library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
[30m── [1mAttaching packages[22m ─────────────────────────────── tidyverse 1.3.1 ──[39m
[30m[32m✓[30m [34mggplot2[30m 3.3.5 [32m✓[30m [34mpurrr [30m 0.3.4
[32m✓[30m [34mtibble [30m 3.1.3 [32m✓[30m [34mdplyr [30m 1.0.7
[32m✓[30m [34mtidyr [30m 1.1.3 [32m✓[30m [34mstringr[30m 1.4.0
[32m✓[30m [34mreadr [30m 1.4.0 [32m✓[30m [34mforcats[30m 0.5.1[39m
[30m── [1mConflicts[22m ────────────────────────────────── tidyverse_conflicts() ──
[31mx[30m [34mdplyr[30m::[32mfilter()[30m masks [34mstats[30m::filter()
[31mx[30m [34mdplyr[30m::[32mlag()[30m masks [34mstats[30m::lag()[39m
library(phyloseq)
Registered S3 method overwritten by 'data.table':
method from
print.data.table
library(phangorn)
Loading required package: ape
library(readr)
library(ape)
library(vegan)
Loading required package: permute
Loading required package: lattice
This is vegan 2.5-7
Attaching package: ‘vegan’
The following objects are masked from ‘package:phangorn’:
diversity, treedist
library(RColorBrewer)
library(microbiome)
microbiome R package (microbiome.github.com)
Copyright (C) 2011-2020 Leo Lahti,
Sudarshan Shetty et al. <microbiome.github.io>
Attaching package: ‘microbiome’
The following object is masked from ‘package:vegan’:
diversity
The following object is masked from ‘package:phangorn’:
diversity
The following object is masked from ‘package:ggplot2’:
alpha
The following object is masked from ‘package:base’:
transform
library(compositions)
Welcome to compositions, a package for compositional data analysis.
Find an intro with "? compositions"
Attaching package: ‘compositions’
The following object is masked from ‘package:ape’:
balance
The following objects are masked from ‘package:stats’:
cor, cov, dist, var
The following objects are masked from ‘package:base’:
%*%, norm, scale, scale.default
library(SpiecEasi)
Attaching package: ‘SpiecEasi’
The following objects are masked from ‘package:compositions’:
alr, clr
library(otuSummary)
library(psych)
Attaching package: ‘psych’
The following objects are masked from ‘package:SpiecEasi’:
cor2cov, shannon
The following objects are masked from ‘package:compositions’:
ellipses, pairwisePlot
The following object is masked from ‘package:microbiome’:
alpha
The following objects are masked from ‘package:ggplot2’:
%+%, alpha
library(Matrix)
Attaching package: ‘Matrix’
The following objects are masked from ‘package:SpiecEasi’:
tril, triu
The following objects are masked from ‘package:tidyr’:
expand, pack, unpack
library(igraph)
Attaching package: ‘igraph’
The following object is masked from ‘package:SpiecEasi’:
make_graph
The following object is masked from ‘package:compositions’:
normalize
The following object is masked from ‘package:microbiome’:
diversity
The following object is masked from ‘package:vegan’:
diversity
The following object is masked from ‘package:permute’:
permute
The following object is masked from ‘package:phangorn’:
diversity
The following objects are masked from ‘package:ape’:
edges, mst, ring
The following objects are masked from ‘package:dplyr’:
as_data_frame, groups, union
The following objects are masked from ‘package:purrr’:
compose, simplify
The following object is masked from ‘package:tidyr’:
crossing
The following object is masked from ‘package:tibble’:
as_data_frame
The following objects are masked from ‘package:stats’:
decompose, spectrum
The following object is masked from ‘package:base’:
union
library(plotly)
Registered S3 methods overwritten by 'htmltools':
method from
print.html tools:rstudio
print.shiny.tag tools:rstudio
print.shiny.tag.list tools:rstudio
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
Attaching package: ‘plotly’
The following object is masked from ‘package:igraph’:
groups
The following object is masked from ‘package:ggplot2’:
last_plot
The following object is masked from ‘package:stats’:
filter
The following object is masked from ‘package:graphics’:
layout
library(egg)
Loading required package: gridExtra
Attaching package: ‘gridExtra’
The following object is masked from ‘package:dplyr’:
combine
library(ggvegan)
# Report versions of packages
sessionInfo()
R version 4.0.2 (2020-06-22)
Platform: x86_64-apple-darwin17.0 (64-bit)
Running under: macOS Catalina 10.15.7
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.0/Resources/lib/libRlapack.dylib
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
attached base packages:
[1] stats graphics grDevices utils datasets methods
[7] base
other attached packages:
[1] ggvegan_0.1-0 egg_0.4.5 gridExtra_2.3
[4] plotly_4.9.2.2 igraph_1.2.6 Matrix_1.3-0
[7] psych_2.1.3 otuSummary_0.1.1 SpiecEasi_1.1.1
[10] compositions_2.0-0 microbiome_1.10.0 RColorBrewer_1.1-2
[13] vegan_2.5-7 lattice_0.20-41 permute_0.9-5
[16] phangorn_2.5.5 ape_5.4-1 phyloseq_1.32.0
[19] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.7
[22] purrr_0.3.4 readr_1.4.0 tidyr_1.1.3
[25] tibble_3.1.3 ggplot2_3.3.5 tidyverse_1.3.1
loaded via a namespace (and not attached):
[1] Rtsne_0.15 VGAM_1.1-5 colorspace_2.0-2
[4] ellipsis_0.3.2 XVector_0.28.0 fs_1.5.0
[7] rstudioapi_0.13 ggrepel_0.9.1 fansi_0.5.0
[10] lubridate_1.7.10 xml2_1.3.2 codetools_0.2-18
[13] splines_4.0.2 mnormt_2.0.2 robustbase_0.93-6
[16] knitr_1.30 ade4_1.7-16 jsonlite_1.7.2
[19] broom_0.7.9 cluster_2.1.0 dbplyr_2.1.1
[22] compiler_4.0.2 httr_1.4.2 backports_1.2.1
[25] assertthat_0.2.1 lazyeval_0.2.2 cli_3.0.1
[28] htmltools_0.5.1.1 prettyunits_1.1.1 tools_4.0.2
[31] gtable_0.3.0 glue_1.4.2 reshape2_1.4.4
[34] fastmatch_1.1-0 Rcpp_1.0.7 Biobase_2.48.0
[37] cellranger_1.1.0 vctrs_0.3.8 Biostrings_2.56.0
[40] multtest_2.44.0 nlme_3.1-151 iterators_1.0.13
[43] tensorA_0.36.2 xfun_0.24 rvest_1.0.1
[46] lifecycle_1.0.0 DEoptimR_1.0-8 zlibbioc_1.34.0
[49] MASS_7.3-53 scales_1.1.1 hms_1.1.0
[52] parallel_4.0.2 biomformat_1.16.0 rhdf5_2.32.4
[55] huge_1.3.4.1 stringi_1.7.3 S4Vectors_0.26.1
[58] foreach_1.5.1 BiocGenerics_0.34.0 shape_1.4.6
[61] rlang_0.4.11 pkgconfig_2.0.3 Rhdf5lib_1.10.1
[64] htmlwidgets_1.5.3 tidyselect_1.1.1 plyr_1.8.6
[67] magrittr_2.0.1 R6_2.5.0 IRanges_2.22.2
[70] generics_0.1.0 DBI_1.1.1 pillar_1.6.2
[73] haven_2.3.1 withr_2.4.2 mgcv_1.8-33
[76] survival_3.2-7 bayesm_3.1-4 modelr_0.1.8
[79] pulsar_0.3.7 crayon_1.4.1 utf8_1.2.2
[82] tmvnsim_1.0-2 progress_1.2.2 grid_4.0.2
[85] readxl_1.3.1 data.table_1.13.4 reprex_2.0.1
[88] digest_0.6.27 stats4_4.0.2 munsell_0.5.0
[91] glmnet_4.1-1 viridisLite_0.4.0 quadprog_1.5-8
Metadata:
metadata <- read_csv("Metadata.csv")
Import SRA table and match SRA IDs with sample IDs in metadata file
SRARunTable <- read_csv("sra_data/SraRunTable.txt")
metadata <- left_join(metadata, SRARunTable, by = 'Sample Name')
DADA2 results:
# Import Count table. Skip first row of tsv file, which is just some text
count_table <- read_tsv(file="dada2_export/ASVs_counts.tsv")
# And specify that the first column of data are rownames
count_table <- column_to_rownames(count_table, var = colnames(count_table)[1])
# Import taxonomy of ASVs
taxonomy <- read_tsv(file="dada2_export/ASVs_taxonomy.tsv")
# And specify that the first column of data are rownames
taxonomy <- column_to_rownames(taxonomy, var = colnames(taxonomy)[1])
# Use rarecurve, from the Vegan package. Rarcurve expects the dataset as a dataframe so we need to use as.data.frame again:
count_table_df <- as.data.frame(count_table)
# Plot the rarefaction curves, color-coding by the colors listed in sample_info_tab, which indicate sample type, and transforming using t() again
# Running this 5-10 samples at a time because otherwise it takes a long time to render
rarecurve(t(count_table_df), step=100, cex=0.5, ylab="ASVs", label=T)
count_table_no_singletons <- filter(count_table,rowSums(count_table)>1)
# retains all ASVs (out of 14176)
and change sample names from NCBI ID to our internal sample IDs
# Modify taxa names in count_table_no_singletons, which are the NCBI SRA numbers. Want to use our internal sample key
key <- SRARunTable %>% select(Run, 'Sample Name')
x <- (t(count_table_no_singletons))
x <- as.data.frame(cbind(x, Run = rownames(x)))
y <- t(left_join(x, key, by = "Run"))
colnames(y) <- y['Sample Name',]
y <- y[ !(rownames(y) %in% c('Sample Name', 'Run')), ]
count_table_2 <- type_convert(as.data.frame(y))
This process takes a LONG time so run once and save .RData object In the Dada2 tools, there are no options to build a tree (unlike in Qiime2) but we can build it here using DECIPHER and phangorn
(Based on https://f1000research.com/articles/5-1492/v2)
Make an alignment using tools from Decipher (Note- alignment step takes several hours. Commented out for now. Only need to run once)
## import fasta
# fas <- "dada2_export/ASVs.fa"
# seqs <- readDNAStringSet(fas)
# seqs
#
# # perform the alignment
# aligned <- AlignSeqs(seqs) # automatically detects and uses all cores
#
# # view the alignment in a browser (optional)
# BrowseSeqs(aligned, highlight=0)
#
# # write out aligned sequence file
# writeXStringSet(aligned, file="ASVs.aligned.fasta")
Use phangorn package to build tree. Here we are building a maximum likelihood neighbor-joining tree. (Also takes a while to run. Comment out for now.)
# phang.align <- phyDat(as(aligned, "matrix"), type="DNA") # convert to phyDat format
# dm <- dist.ml(phang.align) # calculate pairwise distance matrix
# treeNJ <- NJ(dm) # perform neighbor-joining tree method
# fit = pml(treeNJ, data=phang.align) # compute intermal max likelihood
Since the step above takes a long time, save all variables up to this point in environment as RData object
# save.image("EnvironmentBackups/CariacoEuks_postanalysis_vars_upto_tree.RData")
Re-load
load("EnvironmentBackups/CariacoEuks_postanalysis_vars_upto_tree.RData")
Here we will do ordinations using the phyloseq package, which first requires making phyloseq objects out of each of our input data tables (in the last tutorial, I imported the tree using phyloseq so it is already a phyloseq object)
ASV = otu_table(count_table_2, taxa_are_rows = TRUE)
TAX = tax_table(as.matrix(taxonomy))
META = sample_data(data.frame(metadata, row.names = metadata$`Sample Name`))
TREE = phy_tree(fit$tree)
First check that the inputs are in compatible formats by checking for ASV names with the phyloseq function, taxa_names
head(taxa_names(TAX))
head(taxa_names(ASV))
head(taxa_names(TREE))
And check sample names were also detected
head(sample_names(ASV))
head(sample_names(META))
And make the phyloseq object
ps <- phyloseq(ASV, TAX, META , TREE)
Check some features of the phyloseq object
rank_names(ps)
table(tax_table(ps)[, "Supergroup"], exclude = NULL)
unique(tax_table(ps)[, "Supergroup"])
Filter out those ambigious Supergroup annotations- losing 471 ASVs
ps <- subset_taxa(ps, !is.na(Supergroup) & !Supergroup %in% c("", "NA"))
table(tax_table(ps)[, "Supergroup"], exclude = NULL)
Check out the Division names
table(tax_table(ps)[, "Division"], exclude = NULL)
Filter out any with “NA” as Division
ps <- subset_taxa(ps, !is.na(Division) & !Division %in% c(""))
table(tax_table(ps)[, "Division"], exclude = NULL)
After the above, 13,427 ASVs remain from the original 14,177
Eliminate the libraries that didn’t have many sequences, AE3a198A, AE3b314A, AE2a200A, AE2b900AN, AE2a200B, AE2a267B, AE2a900BN
taxa_to_keep <- !sample_names(ps) %in% c("AE3a198A","AE3b314A","AE2a200A","AE2b900AN","AE2a200B","AE2a267B","AE2a900BN")
ps <- prune_samples(taxa_to_keep, ps)
41 samples remain and stil 13,427 ASVs
Check rarefaction curve again to make sure those low-sqeuencing-effort samples have been removed
rarecurve(t(otu_table(ps)), step=100, cex=0.5, ylab="ASVs", label=T)
Have to do this because you may have removed the root of your tree when pruning). (I found this handy function from here which picks the longest branch to root from).
# first define function from link above to find furthest outgroup
pick_new_outgroup <- function(tree.unrooted){
require("magrittr")
require("data.table")
require("ape") # ape::Ntip
# tablify parts of tree that we need.
treeDT <-
cbind(
data.table(tree.unrooted$edge),
data.table(length = tree.unrooted$edge.length)
)[1:Ntip(tree.unrooted)] %>%
cbind(data.table(id = tree.unrooted$tip.label))
# Take the longest terminal branch as outgroup
new.outgroup <- treeDT[which.max(length)]$id
return(new.outgroup) }
# then run on my phyloseq tree
my.tree <- phy_tree(ps)
out.group <- pick_new_outgroup(my.tree)
out.group
# Then use this outgroup to root the tree
new.tree1 <- ape::root(my.tree, outgroup=out.group, resolve.root=TRUE)
phy_tree(ps) <- new.tree1
# Check if tree is binary (dichotomous not multichotomous)
is.binary.tree(phy_tree(ps))
# If false, would have to run
# new.tree2 <- ape::multi2di(new.tree1)
# phy_tree(ps) <- new.tree2
# phy_tree(ps)
Check overall how the phyla are distributed among samples. Phyloseq makes this easy
# First aglomerate the ASVs at the phylum level using the phyloseq function, tax_glom
DivisionGlommed = tax_glom(ps, "Division")
# There are many phyla here, so have to make a custom color palette by interpolating from an existing one in RColorBrewer
colourCount = length(table(tax_table(ps)[, "Division"], exclude = NULL))
getPalette = colorRampPalette(brewer.pal(11, "Spectral"))
DivisionPalette = getPalette(colourCount)
# and plot
plot_bar(DivisionGlommed, x = "Sample", fill = "Division") +
scale_fill_manual(values = DivisionPalette)
Plot compositional (relative abundances) instead of absolute abundance using microbiome::transform
ps_ra <- microbiome::transform(ps, transform = "compositional")
(otu_table(ps_ra))[1:5,1:5]
# Then aglomerate the ASVs at the phylum level using the phyloseq function, tax_glom
DivisionGlommed_RA = tax_glom(ps_ra, "Division")
# and plot
Division_barplot <- plot_bar(DivisionGlommed_RA, x = "Sample", fill = "Division") +
scale_fill_manual(values = DivisionPalette) +
theme(legend.text = element_text(size = 10))
Division_barplot
# export
ggsave("Figures/Division_barplot.eps",Division_barplot, width = 15, height = 5, units = c("in"))
Lots of dinoflagellates and radiolaria. Makes sense. But the above is the distribution from all samples. Next make plots that indicate distributions across environmental gradients. Calculate averages and use bubble plots
Get average relative abundances from sample replicates
otu_table_mean_ra <-
mutate(data.frame(otu_table(ps_ra)), "103A" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE3a103A","AE3b103A")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "198A" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE3b198A")), na.rm = TRUE)) %>% # Sample AE3a198A was removed
mutate(data.frame(otu_table(ps_ra)), "234A" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE3a234A","AE3b234A")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "295A" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE3a295A","AE3b295A")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "314A" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE3a314A")), na.rm = TRUE)) %>% # Sample AE3b314A was removed
mutate(data.frame(otu_table(ps_ra)), "900AM" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE3a900AM","AE1b900AM")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "103B" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE3a103B","AE3b103B")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "198B" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE3a198B","AE3b198B")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "234B" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE3a234B","AE3b234B")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "295B" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE3a295B","AE3b295B")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "314B" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE3a314B","AE3b314B")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "900BM" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE3a900BM","AE1b900BM")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "143A" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE2a143A","AE2b143A")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "200A" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE2b200A")), na.rm = TRUE)) %>% # AE2a200A was removed
mutate(data.frame(otu_table(ps_ra)), "237A" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE2a237A","AE2b237A")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "247A" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE2a247A","AE2b247A")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "267A" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE2a267A","AE2b267A")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "900AN" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE2a900AN")), na.rm = TRUE)) %>% # AE2b900AN was removed
mutate(data.frame(otu_table(ps_ra)), "143B" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE2a143B","AE2b143B")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "200B" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE2b200B")), na.rm = TRUE)) %>% # AE2a200B was removed
mutate(data.frame(otu_table(ps_ra)), "237B" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE2a237B","AE2b237B")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "247B" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE2a247B","AE2b247B")), na.rm = TRUE)) %>%
mutate(data.frame(otu_table(ps_ra)), "267B" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE2b267B")), na.rm = TRUE)) %>% # AE2a267B was removed
mutate(data.frame(otu_table(ps_ra)), "900BN" = rowMeans(select(data.frame(otu_table(ps_ra)), c("AE2b900BN")), na.rm = TRUE)) # AE2a900BN was removed
otu_table_mean_ra <- otu_table_mean_ra[,unique(metadata$Replicate)]
otu_table_mean_ra
Make into new phyloseq object
metadata2 <- unique(select(metadata,!c('Sample Name',Type,colnames(SRARunTable))))
META2 <- sample_data(data.frame(metadata2, row.names = metadata2$Replicate))
ps_ra_mean <- phyloseq(otu_table(otu_table_mean_ra, taxa_are_rows = TRUE), TAX, TREE, META2)
# First aglomerate the ASVs at the phylum level using the phyloseq function, tax_glom
ps_ra_mean_division <- tax_glom(ps_ra_mean, "Division")
# and check by bar plotting
plot_bar(ps_ra_mean_division, x = "Sample", fill = "Division") +
scale_fill_manual(values = DivisionPalette)
Extract mean relative abundance, glommed by division, from the phyloseq object and pair it to taxonomic data
division_df <- data.frame(otu_table(ps_ra_mean_division))
colnames(division_df) <- colnames(otu_table(ps_ra_mean_division))
division_df$ASV <- rownames(division_df)
otu_table_mean_ra <- left_join(division_df, as_tibble(taxonomy, rownames = "ASV"), by = "ASV")
otu_table_mean_ra
Some manual curating for plottin
# Make a new column that has Supergroup-Division in same colum
otu_table_mean_ra$SupergroupDivision <- paste(otu_table_mean_ra$Supergroup, otu_table_mean_ra$Division)
otu_table_mean_ra
Pivot longer
otu_table_mean_ra <- pivot_longer(otu_table_mean_ra, cols = unique(metadata$Replicate), names_to = "Replicate", values_to = "Mean_RA")
otu_table_mean_ra
Join metadata
otu_table_mean_ra <- left_join(otu_table_mean_ra, unique(select(metadata, c("Replicate", "Depth", "SizeFraction", "Season", "OxCond", "Fluorescence", "BeamAtt", "O2", "Temp", "Salinity", "H2S", "ParticulateS", "TZVS", "CH4", "NO3", "NO2", "NH4", "PO4", "Chemoautotrophy", "BNP", "MicroAbun(x10^8 L^-1)", "FlagAbun(x10^5 L-1)", "VLP(x10^8 L-1)"))), by = "Replicate")
# Replace zeroes in RA with NA (better for plotting)
otu_table_mean_ra$Mean_RA[otu_table_mean_ra$Mean_RA == 0] <- NA
otu_table_mean_ra
# reorder some factors to make them plot in the order I want
otu_table_mean_ra$OxCond <- factor(otu_table_mean_ra$OxCond, levels = c("Oxycline", "ShallowAnoxic", "Euxinic"))
otu_table_mean_ra$SizeFraction <- factor(otu_table_mean_ra$SizeFraction, levels = c("PA", "FL"))
euk_divisions_bubbleplot_color <- ggplot(otu_table_mean_ra,aes (x = as.character(Depth), y = reorder(SupergroupDivision, Mean_RA, function(x){sum(x,na.rm = TRUE)}), color = OxCond)) +
geom_point(aes(size =Mean_RA))+
facet_wrap(Season~SizeFraction, scales = "free_x", drop= TRUE, ncol = 4) +
scale_size(range = c(1,15)) +
scale_size_area(breaks = c(0,.25,.5,.75,1), max_size = 6) +
xlab("Depth") +
ylab("") +
labs(size="Relative Abundance", color = "Redox Condition") +
scale_color_manual(values = c("blue", "red", "brown4")) +
theme_bw() +
theme(axis.text = element_text(size=8),
axis.text.x = element_text(size=8, angle = 45, hjust = 1),
axis.title = element_text(size=8),
legend.title = element_text(size=8),
legend.text = element_text(size=8),
strip.text = element_text(size = 8),
legend.margin=margin(0,0,0,2),
legend.box.margin=margin(-10,-10,-10,-10),
plot.margin=grid::unit(c(0,0,0,0), "mm"))
euk_divisions_bubbleplot_color
Save figure
# set explicit panel size so they will be consistent for all figures
euk_divisions_bubbleplot_color <- set_panel_size(euk_divisions_bubbleplot_color, width = unit(22, "mm"), height = unit(100, "mm"))
ggsave(filename = "Figures/euk_divisions_bubbleplot_color.eps", plot = euk_divisions_bubbleplot_color, units = c("mm"), width = 180, height = 125, dpi = 300)
Filter to only Alveolates; glom by order
keeptaxa <- taxa_names(ps_ra_mean)[(as.data.frame(tax_table(ps_ra_mean))$Supergroup %in% c("Alveolata"))]
ps_ra_mean_alveolates <- prune_taxa(keeptaxa, ps_ra_mean)
ps_ra_mean_alveolate_orders <- tax_glom(ps_ra_mean_alveolates, "Order")
aveloates_df <- data.frame(otu_table(ps_ra_mean_alveolate_orders))
colnames(aveloates_df) <- colnames(otu_table(ps_ra_mean_alveolate_orders))
aveloates_df$ASV <- rownames(aveloates_df)
otu_table_mean_ra <- left_join(aveloates_df, as_tibble(taxonomy, rownames = "ASV"), by = "ASV")
otu_table_mean_ra
Some manual curating for plottin
# Make a new column that has descriptive taxonomy
otu_table_mean_ra$Descriptive <- paste(otu_table_mean_ra$Division, otu_table_mean_ra$Class, otu_table_mean_ra$Order)
otu_table_mean_ra
Pivot longer
otu_table_mean_ra <- pivot_longer(otu_table_mean_ra, cols = unique(metadata$Replicate), names_to = "Replicate", values_to = "Mean_RA")
otu_table_mean_ra
Join metadata
otu_table_mean_ra <- left_join(otu_table_mean_ra, unique(select(metadata, c("Replicate", "Depth", "SizeFraction", "Season", "OxCond", "Fluorescence", "BeamAtt", "O2", "Temp", "Salinity", "H2S", "ParticulateS", "TZVS", "CH4", "NO3", "NO2", "NH4", "PO4", "Chemoautotrophy", "BNP", "MicroAbun(x10^8 L^-1)", "FlagAbun(x10^5 L-1)", "VLP(x10^8 L-1)"))), by = "Replicate")
# Replace zeroes in RA with NA (better for plotting)
otu_table_mean_ra$Mean_RA[otu_table_mean_ra$Mean_RA == 0] <- NA
otu_table_mean_ra
Shorten some labels to make space in plot
otu_table_mean_ra[otu_table_mean_ra == c("Ciliophora Cyclotrichium_like_organism Cyclotrichium_like_organism_X")] <- c("Cilio. Cyclotrichium_like Cyclotrichium_like")
otu_table_mean_ra[otu_table_mean_ra == c("Apicomplexa Gregarinomorphea Gregarines_GRE2")] <- c("Apicom. Gregarinomorphea GRE2")
otu_table_mean_ra[otu_table_mean_ra == c("Apicomplexa Gregarinomorphea Eugregarinorida")] <- c("Apicom. Gregarinomorphea Eugregarinorida")
otu_table_mean_ra[otu_table_mean_ra == c("Apicomplexa Coccidiomorphea Agamococcidiorida")] <- c("Apicom. Coccidiomorphea Agamococcidiorida")
otu_table_mean_ra[otu_table_mean_ra == c("Dinoflagellata Ellobiophyceae Thalassomycetales")] <- c("Dino. Ellobiophyceae Thalassomycetales")
otu_table_mean_ra[otu_table_mean_ra == c("Ciliophora Oligohymenophorea Scuticociliatia_1")] <- c("Cilio. Oligohymenophorea Scuticociliatia_1")
otu_table_mean_ra[otu_table_mean_ra == c("Apicomplexa Apicomplexa_X Apicomplexa_XX")] <- c("Apicom. Apicomplexa_X Apicomplexa_XX")
otu_table_mean_ra
# reorder some factors to make them plot in the order I want
otu_table_mean_ra$OxCond <- factor(otu_table_mean_ra$OxCond, levels = c("Oxycline", "ShallowAnoxic", "Euxinic"))
otu_table_mean_ra$SizeFraction <- factor(otu_table_mean_ra$SizeFraction, levels = c("PA", "FL"))
alveolata_bubbleplot_color <- ggplot(otu_table_mean_ra,aes (x = as.character(Depth), y = reorder(Descriptive, Mean_RA, function(x){sum(x,na.rm = TRUE)}), color = OxCond)) +
geom_point(aes(size =Mean_RA))+
facet_wrap(Season~SizeFraction, scales = "free_x", drop= TRUE, ncol = 4) +
scale_size(range = c(1,15)) +
scale_size_area(breaks = c(0,.25,.5,.75,1), max_size = 6) +
xlab("Depth") +
ylab("") +
labs(size="Relative Abundance", color = "Redox Condition") +
scale_color_manual(values = c("blue", "red", "brown4")) +
theme_bw() +
theme(axis.text = element_text(size=8),
axis.text.x = element_text(size=8, angle = 45, hjust = 1),
axis.title = element_text(size=8),
legend.title = element_text(size=8),
legend.text = element_text(size=8),
strip.text = element_text(size = 8),
legend.margin=margin(0,0,0,2),
legend.box.margin=margin(-10,-10,-10,-10),
plot.margin=grid::unit(c(0,0,0,0), "mm"))
alveolata_bubbleplot_color
Save figure
# set explicit panel size so they will be consistent for all figures
alveolata_bubbleplot_color <- set_panel_size(alveolata_bubbleplot_color, width = unit(20, "mm"), height = unit(125, "mm"))
ggsave(filename = "Figures/alveolata_bubbleplot_color.eps", plot = alveolata_bubbleplot_color, units = c("mm"), width = 180, height = 150, dpi = 300)
Filter to only Rhizaria; glom by order
keeptaxa <- taxa_names(ps_ra_mean)[(as.data.frame(tax_table(ps_ra_mean))$Supergroup %in% c("Rhizaria"))]
ps_ra_mean_rhizaria <- prune_taxa(keeptaxa, ps_ra_mean)
ps_ra_mean_rhizaria_orders <- tax_glom(ps_ra_mean_rhizaria, "Order")
rhizaria_df <- data.frame(otu_table(ps_ra_mean_rhizaria_orders))
colnames(rhizaria_df) <- colnames(otu_table(ps_ra_mean_rhizaria_orders))
rhizaria_df$ASV <- rownames(rhizaria_df)
otu_table_mean_ra <- left_join(rhizaria_df, as_tibble(taxonomy, rownames = "ASV"), by = "ASV")
otu_table_mean_ra
Some manual curating for plotting
# Make a new column that has descriptive taxonomy
otu_table_mean_ra$Descriptive <- paste(otu_table_mean_ra$Division, otu_table_mean_ra$Class, otu_table_mean_ra$Order)
otu_table_mean_ra
Pivot longer
otu_table_mean_ra <- pivot_longer(otu_table_mean_ra, cols = unique(metadata$Replicate), names_to = "Replicate", values_to = "Mean_RA")
otu_table_mean_ra
Join metadata
otu_table_mean_ra <- left_join(otu_table_mean_ra, unique(select(metadata, c("Replicate", "Depth", "SizeFraction", "Season", "OxCond", "Fluorescence", "BeamAtt", "O2", "Temp", "Salinity", "H2S", "ParticulateS", "TZVS", "CH4", "NO3", "NO2", "NH4", "PO4", "Chemoautotrophy", "BNP", "MicroAbun(x10^8 L^-1)", "FlagAbun(x10^5 L-1)", "VLP(x10^8 L-1)"))), by = "Replicate")
# Replace zeroes in RA with NA (better for plotting)
otu_table_mean_ra$Mean_RA[otu_table_mean_ra$Mean_RA == 0] <- NA
otu_table_mean_ra
Shorten some labels to make space in plot
otu_table_mean_ra[otu_table_mean_ra == c("Radiolaria Acantharea Arthracanthida-Symphyacanthida")] <- c("Radiolaria Acantharea A-S")
otu_table_mean_ra[otu_table_mean_ra == c("Cercozoa Chlorarachniophyceae Chlorarachniophyceae_X")] <- c("Cercozoa Chlor. Chlor._X")
otu_table_mean_ra[otu_table_mean_ra == c("Cercozoa Filosa-Thecofilosea Filosa-Thecofilosea_X")] <- c("Cercozoa F-T F-T_X")
otu_table_mean_ra[otu_table_mean_ra == c("Cercozoa Filosa-Granofilosea Filosa-Granofilosea_X")] <- c("Cercozoa F-G. F-G._X")
otu_table_mean_ra[otu_table_mean_ra == c("Cercozoa Chlorarachniophyceae Chlorarachnida")] <- c("Cercozoa Chlor. Chlorarachnida")
otu_table_mean_ra[otu_table_mean_ra == c("Cercozoa Filosa-Imbricatea Thaumatomonadida")] <- c("Cercozoa F-I Thaumatomonadida")
otu_table_mean_ra[otu_table_mean_ra == c("Cercozoa Filosa-Imbricatea Filosa-Imbricatea_X")] <- c("Cercozoa F-I F-I_X")
otu_table_mean_ra[otu_table_mean_ra == c("Cercozoa Endomyxa-Phytomyxea Phagomyxida")] <- c("Cercozoa E-P Phagomyxida")
otu_table_mean_ra[otu_table_mean_ra == c("Cercozoa Filosa-Sarcomonadea Cercomonadida")] <- c("Cercozoa F-S Cercomonadida")
otu_table_mean_ra[otu_table_mean_ra == c("Cercozoa Novel-clade-10-12 Novel-clade-12")] <- c("Cercozoa N-C−10−12 N-C−12")
otu_table_mean_ra[otu_table_mean_ra == c("Cercozoa Filosa-Thecofilosea Ventricleftida")] <- c("Cercozoa F-T Ventricleftida")
otu_table_mean_ra[otu_table_mean_ra == c("Cercozoa Filosa-Imbricatea Marimonadida")] <- c("Cercozoa F-I Marimonadida")
otu_table_mean_ra[otu_table_mean_ra == c("Cercozoa Filosa-Thecofilosea Cryomonadida")] <- c("Cercozoa F-T Cryomonadida")
otu_table_mean_ra[otu_table_mean_ra == c("Cercozoa Filosa-Sarcomonadea Glissomonadida")] <- c("Cercozoa F-S Glissomonadida")
otu_table_mean_ra[otu_table_mean_ra == c("Cercozoa Filosa-Imbricatea Euglyphida")] <- c("Cercozoa F-I Euglyphida")
otu_table_mean_ra
# reorder some factors to make them plot in the order I want
otu_table_mean_ra$OxCond <- factor(otu_table_mean_ra$OxCond, levels = c("Oxycline", "ShallowAnoxic", "Euxinic"))
otu_table_mean_ra$SizeFraction <- factor(otu_table_mean_ra$SizeFraction, levels = c("PA", "FL"))
rhizaria_bubbleplot_color <- ggplot(otu_table_mean_ra,aes (x = as.character(Depth), y = reorder(Descriptive, Mean_RA, function(x){sum(x,na.rm = TRUE)}), color = OxCond)) +
geom_point(aes(size =Mean_RA))+
facet_wrap(Season~SizeFraction, scales = "free_x", drop= TRUE, ncol = 4) +
scale_size(range = c(1,15)) +
scale_size_area(breaks = c(0,.25,.5,.75,1), max_size = 6) +
xlab("Depth") +
ylab("") +
labs(size="Relative Abundance", color = "Redox Condition") +
scale_color_manual(values = c("blue", "red", "brown4")) +
theme_bw() +
theme(axis.text = element_text(size=8),
axis.text.x = element_text(size=8, angle = 45, hjust = 1),
axis.title = element_text(size=8),
legend.title = element_text(size=8),
legend.text = element_text(size=8),
strip.text = element_text(size = 8),
legend.margin=margin(0,0,0,2),
legend.box.margin=margin(-10,-10,-10,-10),
plot.margin=grid::unit(c(0,0,0,0), "mm"))
rhizaria_bubbleplot_color
Save figure
# set explicit panel size so they will be consistent for all figures
rhizaria_bubbleplot_color <- set_panel_size(rhizaria_bubbleplot_color, width = unit(20, "mm"), height = unit(100, "mm"))
ggsave(filename = "Figures/rhizaria_bubbleplot_color.eps", plot = rhizaria_bubbleplot_color, units = c("mm"), width = 180, height = 125, dpi = 300)
Filter to only Opisthokonta; glom by order
keeptaxa <- taxa_names(ps_ra_mean)[(as.data.frame(tax_table(ps_ra_mean))$Supergroup %in% c("Opisthokonta"))]
ps_ra_mean_opisthokonta <- prune_taxa(keeptaxa, ps_ra_mean)
ps_ra_mean_opisthokonta_orders <- tax_glom(ps_ra_mean_opisthokonta, "Order")
opisthokonta_df <- data.frame(otu_table(ps_ra_mean_opisthokonta_orders))
colnames(opisthokonta_df) <- colnames(otu_table(ps_ra_mean_opisthokonta_orders))
opisthokonta_df$ASV <- rownames(opisthokonta_df)
otu_table_mean_ra <- left_join(opisthokonta_df, as_tibble(taxonomy, rownames = "ASV"), by = "ASV")
otu_table_mean_ra
Some manual curating for plottin
# Make a new column that has descriptive taxonomy
otu_table_mean_ra$Descriptive <- paste(otu_table_mean_ra$Division, otu_table_mean_ra$Class, otu_table_mean_ra$Order)
otu_table_mean_ra
Pivot longer
otu_table_mean_ra <- pivot_longer(otu_table_mean_ra, cols = unique(metadata$Replicate), names_to = "Replicate", values_to = "Mean_RA")
otu_table_mean_ra
Join metadata
otu_table_mean_ra <- left_join(otu_table_mean_ra, unique(select(metadata, c("Replicate", "Depth", "SizeFraction", "Season", "OxCond", "Fluorescence", "BeamAtt", "O2", "Temp", "Salinity", "H2S", "ParticulateS", "TZVS", "CH4", "NO3", "NO2", "NH4", "PO4", "Chemoautotrophy", "BNP", "MicroAbun(x10^8 L^-1)", "FlagAbun(x10^5 L-1)", "VLP(x10^8 L-1)"))), by = "Replicate")
# Replace zeroes in RA with NA (better for plotting)
otu_table_mean_ra$Mean_RA[otu_table_mean_ra$Mean_RA == 0] <- NA
otu_table_mean_ra
Shorten some labels to make space in plot
otu_table_mean_ra[otu_table_mean_ra == c("Choanoflagellida Choanoflagellatea Acanthoecida")] <- c("Choanof. Ch. Acanthoecida")
otu_table_mean_ra[otu_table_mean_ra == c("Choanoflagellida Choanoflagellatea Craspedida")] <- c("Choanof. Ch. Craspedida")
otu_table_mean_ra[otu_table_mean_ra == c("Choanoflagellida Choanoflagellatea Choanoflagellatea_X")] <- c("Choanof. Ch. Choanoflagellatea_X")
otu_table_mean_ra[otu_table_mean_ra == c("Choanoflagellida Choanoflagellida_X Choanoflagellida_XX")] <- c("Choanof. Choanof._X Choanof._XX")
otu_table_mean_ra[otu_table_mean_ra == c("Mesomycetozoa Ichthyosporea Ichthyosphonida")] <- c("Mesomy. Ichthyosporea Ichthyosphonida")
otu_table_mean_ra[otu_table_mean_ra == c("Opisthokonta_X Opisthokonta_XX Opisthokonta_XXX")] <- c("Opis._X Opis._XX Opis._XXX")
otu_table_mean_ra
# reorder some factors to make them plot in the order I want
otu_table_mean_ra$OxCond <- factor(otu_table_mean_ra$OxCond, levels = c("Oxycline", "ShallowAnoxic", "Euxinic"))
otu_table_mean_ra$SizeFraction <- factor(otu_table_mean_ra$SizeFraction, levels = c("PA", "FL"))
opithokonta_bubbleplot_color <- ggplot(otu_table_mean_ra,aes (x = as.character(Depth), y = reorder(Descriptive, Mean_RA, function(x){sum(x,na.rm = TRUE)}), color = OxCond)) +
geom_point(aes(size =Mean_RA))+
facet_wrap(Season~SizeFraction, scales = "free_x", drop= TRUE, ncol = 4) +
scale_size(range = c(1,15)) +
scale_size_area(breaks = c(0,.1,.2,.3), max_size = 6) +
xlab("Depth") +
ylab("") +
labs(size="Relative Abundance", color = "Redox Condition") +
scale_color_manual(values = c("blue", "red", "brown4")) +
theme_bw() +
theme(axis.text = element_text(size=8),
axis.text.x = element_text(size=8, angle = 45, hjust = 1),
axis.title = element_text(size=8),
legend.title = element_text(size=8),
legend.text = element_text(size=8),
strip.text = element_text(size = 8),
legend.margin=margin(0,0,0,2),
legend.box.margin=margin(-10,-10,-10,-10),
plot.margin=grid::unit(c(0,0,0,0), "mm"))
opithokonta_bubbleplot_color
Save figure
# set explicit panel size so they will be consistent for all figures
opithokonta_bubbleplot_color <- set_panel_size(opithokonta_bubbleplot_color, width = unit(20, "mm"), height = unit(100, "mm"))
ggsave(filename = "Figures/opithokonta_bubbleplot_color.eps", plot = opithokonta_bubbleplot_color, units = c("mm"), width = 180, height = 125, dpi = 300)
Filter to only Stramenopiles; glom by class (more meaningful than Order in this case)
keeptaxa <- taxa_names(ps_ra_mean)[(as.data.frame(tax_table(ps_ra_mean))$Supergroup %in% c("Stramenopiles"))]
ps_ra_mean_stramenopiles <- prune_taxa(keeptaxa, ps_ra_mean)
ps_ra_mean_stramenopiles_classes <- tax_glom(ps_ra_mean_stramenopiles, "Class")
stramenopiles_df <- data.frame(otu_table(ps_ra_mean_stramenopiles_classes))
colnames(stramenopiles_df) <- colnames(otu_table(ps_ra_mean_stramenopiles_classes))
stramenopiles_df$ASV <- rownames(stramenopiles_df)
otu_table_mean_ra <- left_join(stramenopiles_df, as_tibble(taxonomy, rownames = "ASV"), by = "ASV")
otu_table_mean_ra
Some manual curating for plottin
# Make a new column that has descriptive taxonomy
otu_table_mean_ra$Descriptive <- paste(otu_table_mean_ra$Division, otu_table_mean_ra$Class)
otu_table_mean_ra
Pivot longer
otu_table_mean_ra <- pivot_longer(otu_table_mean_ra, cols = unique(metadata$Replicate), names_to = "Replicate", values_to = "Mean_RA")
otu_table_mean_ra
Join metadata
otu_table_mean_ra <- left_join(otu_table_mean_ra, unique(select(metadata, c("Replicate", "Depth", "SizeFraction", "Season", "OxCond", "Fluorescence", "BeamAtt", "O2", "Temp", "Salinity", "H2S", "ParticulateS", "TZVS", "CH4", "NO3", "NO2", "NH4", "PO4", "Chemoautotrophy", "BNP", "MicroAbun(x10^8 L^-1)", "FlagAbun(x10^5 L-1)", "VLP(x10^8 L-1)"))), by = "Replicate")
# Replace zeroes in RA with NA (better for plotting)
otu_table_mean_ra$Mean_RA[otu_table_mean_ra$Mean_RA == 0] <- NA
otu_table_mean_ra
Shorten some labels to make space in plot
otu_table_mean_ra[otu_table_mean_ra == c("Stramenopiles_X Stramenopiles_XX")] <- c("Strameno._X Strameno._XX")
otu_table_mean_ra[otu_table_mean_ra == c("Stramenopiles_X Stramenopiles_X-Group-7")] <- c("Strameno._X Strameno._X−Group−7")
otu_table_mean_ra[otu_table_mean_ra == c("Stramenopiles_X MAST-21")] <- c("Strameno._X MAST−21")
otu_table_mean_ra[otu_table_mean_ra == c("Stramenopiles_X MAST-25")] <- c("Strameno._X MAST-25")
otu_table_mean_ra[otu_table_mean_ra == c("Stramenopiles_X Stramenopiles_X-Group-4")] <- c("Strameno._X Strameno._X−Group−4")
otu_table_mean_ra[otu_table_mean_ra == c("Stramenopiles_X Stramenopiles_X-Group-6")] <- c("Strameno._X Strameno._X−Group−6")
otu_table_mean_ra[otu_table_mean_ra == c("Stramenopiles_X Stramenopiles_X-Group-8")] <- c("Strameno._X Strameno._X−Group−8")
otu_table_mean_ra
# reorder some factors to make them plot in the order I want
otu_table_mean_ra$OxCond <- factor(otu_table_mean_ra$OxCond, levels = c("Oxycline", "ShallowAnoxic", "Euxinic"))
otu_table_mean_ra$SizeFraction <- factor(otu_table_mean_ra$SizeFraction, levels = c("PA", "FL"))
stramenopiles_bubbleplot_color <- ggplot(otu_table_mean_ra,aes (x = as.character(Depth), y = reorder(Descriptive, Mean_RA, function(x){sum(x,na.rm = TRUE)}), color = OxCond)) +
geom_point(aes(size =Mean_RA))+
facet_wrap(Season~SizeFraction, scales = "free_x", drop= TRUE, ncol = 4) +
scale_size(range = c(1,15)) +
scale_size_area(breaks = c(0,.1,.2,.3), max_size = 6) +
xlab("Depth") +
ylab("") +
labs(size="Relative Abundance", color = "Redox Condition") +
scale_color_manual(values = c("blue", "red", "brown4")) +
theme_bw() +
theme(axis.text = element_text(size=8),
axis.text.x = element_text(size=8, angle = 45, hjust = 1),
axis.title = element_text(size=8),
legend.title = element_text(size=8),
legend.text = element_text(size=8),
strip.text = element_text(size = 8),
legend.margin=margin(0,0,0,2),
legend.box.margin=margin(-10,-10,-10,-10),
plot.margin=grid::unit(c(0,0,0,0), "mm"))
stramenopiles_bubbleplot_color
Save figure
# set explicit panel size so they will be consistent for all figures
stramenopiles_bubbleplot_color <- set_panel_size(stramenopiles_bubbleplot_color, width = unit(22, "mm"), height = unit(115, "mm"))
ggsave(filename = "Figures/stramenopiles_bubbleplot_color.eps", plot = stramenopiles_bubbleplot_color, units = c("mm"), width = 180, height = 150, dpi = 300)
shannons <- vegan::diversity(t(otu_table(ps)), index = "shannon")
shannons <- t(shannons)
shannons
AE3a103A AE3b103A AE1b900AM AE3a103B AE3b103B AE3a198B AE3b198B AE3a234B AE3b234B
[1,] 4.871221 4.956114 2.916447 4.192101 5.048457 5.352167 5.143548 5.169616 4.959116
AE3a295B AE3b295B AE3a314B AE3b198A AE3b314B AE3a900BM AE1b900BM AE2a143A
[1,] 2.736109 3.53949 2.780448 4.391812 3.143426 3.137984 2.137569 3.083671
AE2b143A AE2b200A AE2a237A AE2b237A AE2a247A AE3a234A AE2b247A AE2a267A AE2b267A
[1,] 4.690686 3.128682 4.191647 4.308389 2.398659 5.334367 2.36533 3.826925 3.929226
AE2a900AN AE2a143B AE2b143B AE2b200B AE2a237B AE3b234A AE2b237B AE2a247B AE2b247B
[1,] 3.047765 4.962882 3.019449 4.772924 2.413723 4.62931 3.37624 2.595961 2.714695
AE2b267B AE2b900BN AE3a295A AE3b295A AE3a314A AE3a900AM
[1,] 4.361093 4.492629 3.07776 2.638438 4.522401 3.592396
shannons_mean <-
mutate(data.frame(shannons), "103A" = rowMeans(select(data.frame(shannons), c("AE3a103A","AE3b103A")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "198A" = rowMeans(select(data.frame(shannons), c("AE3b198A")), na.rm = TRUE)) %>% # Sample AE3a198A was removed
mutate(data.frame(shannons), "234A" = rowMeans(select(data.frame(shannons), c("AE3a234A","AE3b234A")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "295A" = rowMeans(select(data.frame(shannons), c("AE3a295A","AE3b295A")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "314A" = rowMeans(select(data.frame(shannons), c("AE3a314A")), na.rm = TRUE)) %>% # Sample AE3b314A was removed
mutate(data.frame(shannons), "900AM" = rowMeans(select(data.frame(shannons), c("AE3a900AM","AE1b900AM")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "103B" = rowMeans(select(data.frame(shannons), c("AE3a103B","AE3b103B")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "198B" = rowMeans(select(data.frame(shannons), c("AE3a198B","AE3b198B")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "234B" = rowMeans(select(data.frame(shannons), c("AE3a234B","AE3b234B")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "295B" = rowMeans(select(data.frame(shannons), c("AE3a295B","AE3b295B")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "314B" = rowMeans(select(data.frame(shannons), c("AE3a314B","AE3b314B")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "900BM" = rowMeans(select(data.frame(shannons), c("AE3a900BM","AE1b900BM")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "143A" = rowMeans(select(data.frame(shannons), c("AE2a143A","AE2b143A")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "200A" = rowMeans(select(data.frame(shannons), c("AE2b200A")), na.rm = TRUE)) %>% # AE2a200A was removed
mutate(data.frame(shannons), "237A" = rowMeans(select(data.frame(shannons), c("AE2a237A","AE2b237A")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "247A" = rowMeans(select(data.frame(shannons), c("AE2a247A","AE2b247A")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "267A" = rowMeans(select(data.frame(shannons), c("AE2a267A","AE2b267A")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "900AN" = rowMeans(select(data.frame(shannons), c("AE2a900AN")), na.rm = TRUE)) %>% # AE2b900AN was removed
mutate(data.frame(shannons), "143B" = rowMeans(select(data.frame(shannons), c("AE2a143B","AE2b143B")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "200B" = rowMeans(select(data.frame(shannons), c("AE2b200B")), na.rm = TRUE)) %>% # AE2a200B was removed
mutate(data.frame(shannons), "237B" = rowMeans(select(data.frame(shannons), c("AE2a237B","AE2b237B")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "247B" = rowMeans(select(data.frame(shannons), c("AE2a247B","AE2b247B")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "267B" = rowMeans(select(data.frame(shannons), c("AE2b267B")), na.rm = TRUE)) %>% # AE2a267B was removed
mutate(data.frame(shannons), "900BN" = rowMeans(select(data.frame(shannons), c("AE2b900BN")), na.rm = TRUE)) # AE2a900BN was removed
shannons_mean <- shannons_mean[,unique(metadata$Replicate)]
shannons_mean
Get std deviation for each correspinding mean
shannons_sd <-
mutate(data.frame(shannons), "103A" = sd(select(data.frame(shannons), c("AE3a103A","AE3b103A")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "198A" = sd(select(data.frame(shannons), c("AE3b198A")), na.rm = TRUE)) %>% # Sample AE3a198A was removed
mutate(data.frame(shannons), "234A" = sd(select(data.frame(shannons), c("AE3a234A","AE3b234A")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "295A" = sd(select(data.frame(shannons), c("AE3a295A","AE3b295A")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "314A" = sd(select(data.frame(shannons), c("AE3a314A")), na.rm = TRUE)) %>% # Sample AE3b314A was removed
mutate(data.frame(shannons), "900AM" = sd(select(data.frame(shannons), c("AE3a900AM","AE1b900AM")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "103B" = sd(select(data.frame(shannons), c("AE3a103B","AE3b103B")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "198B" = sd(select(data.frame(shannons), c("AE3a198B","AE3b198B")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "234B" = sd(select(data.frame(shannons), c("AE3a234B","AE3b234B")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "295B" = sd(select(data.frame(shannons), c("AE3a295B","AE3b295B")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "314B" = sd(select(data.frame(shannons), c("AE3a314B","AE3b314B")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "900BM" = sd(select(data.frame(shannons), c("AE3a900BM","AE1b900BM")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "143A" = sd(select(data.frame(shannons), c("AE2a143A","AE2b143A")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "200A" = sd(select(data.frame(shannons), c("AE2b200A")), na.rm = TRUE)) %>% # AE2a200A was removed
mutate(data.frame(shannons), "237A" = sd(select(data.frame(shannons), c("AE2a237A","AE2b237A")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "247A" = sd(select(data.frame(shannons), c("AE2a247A","AE2b247A")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "267A" = sd(select(data.frame(shannons), c("AE2a267A","AE2b267A")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "900AN" = sd(select(data.frame(shannons), c("AE2a900AN")), na.rm = TRUE)) %>% # AE2b900AN was removed
mutate(data.frame(shannons), "143B" = sd(select(data.frame(shannons), c("AE2a143B","AE2b143B")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "200B" = sd(select(data.frame(shannons), c("AE2b200B")), na.rm = TRUE)) %>% # AE2a200B was removed
mutate(data.frame(shannons), "237B" = sd(select(data.frame(shannons), c("AE2a237B","AE2b237B")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "247B" = sd(select(data.frame(shannons), c("AE2a247B","AE2b247B")), na.rm = TRUE)) %>%
mutate(data.frame(shannons), "267B" = sd(select(data.frame(shannons), c("AE2b267B")), na.rm = TRUE)) %>% # AE2a267B was removed
mutate(data.frame(shannons), "900BN" = sd(select(data.frame(shannons), c("AE2b900BN")), na.rm = TRUE)) # AE2a900BN was removed
shannons_sd <- shannons_sd[,unique(metadata$Replicate)]
shannons_sd[is.na(shannons_sd)] <- 0
shannons_sd
# Pivot longer
shannons_mean <- pivot_longer(shannons_mean, cols = unique(metadata$Replicate), names_to = "Replicate", values_to = "Shannons")
shannons_sd <- pivot_longer(shannons_sd, cols = unique(metadata$Replicate), names_to = "Replicate", values_to = "StDev")
# Join metadata
shannons_mean <- left_join(shannons_mean, shannons_sd)
Joining, by = "Replicate"
# export this for table in manuscript
write_csv(shannons_mean, "Figures/shannons.csv")
# add metadata for plotting
shannons_mean <- left_join(shannons_mean, unique(select(metadata, c("Replicate", "Depth", "SizeFraction", "Season", "OxCond", "Fluorescence", "BeamAtt", "O2", "Temp", "Salinity", "H2S", "ParticulateS", "TZVS", "CH4", "NO3", "NO2", "NH4", "PO4", "Chemoautotrophy", "BNP", "MicroAbun(x10^8 L^-1)", "FlagAbun(x10^5 L-1)", "VLP(x10^8 L-1)"))), by = "Replicate")
shannons_mean
# reorder some factors to make them plot in the order I want
shannons_mean$OxCond <- factor(shannons_mean$OxCond, levels = c("Oxycline", "ShallowAnoxic", "Euxinic"))
shannons_mean$SizeFraction <- factor(shannons_mean$SizeFraction, levels = c("PA", "FL"))
ytitle <- expression(paste("Shannon's Diversity Index (",italic("H'"),")"))
shannonsplot <- ggplot(shannons_mean, aes(x = Depth, y = Shannons, color = OxCond)) +
geom_line(size=1, color = "black", lty = "dotted") +
geom_point(size=3, shape = c(16)) +
labs(y= ytitle, x = "Depth (m)") +
scale_x_reverse(expand = c(0, 0)) +
coord_flip(xlim = c(910, 100)) +
theme_bw() +
theme(legend.position = "right",
axis.text = element_text(size=8),
axis.text.x = element_text(size=8),
axis.title = element_text(size=8),
legend.title = element_text(size=8),
legend.text = element_text(size=8),
strip.text = element_text(size = 8),
legend.margin=margin(0,0,0,2)) +
facet_wrap(Season~SizeFraction, drop= TRUE, ncol = 4) +
scale_color_manual(values = c("blue", "red", "brown4")) +
labs(color = "Redox Condition")
shannonsplot
Export Plot
# set explicit panel size so they will be consistent for all figures
shannonsplot <- set_panel_size(shannonsplot, width = unit(22, "mm"), height = unit(60, "mm"))
ggsave(filename = "Figures/shannonsplot.eps", plot = shannonsplot, units = c("mm"), width = 180, height = 80, dpi = 300)
McMurdie and Holmes (2013) filter out taxa that were not seen with more than 3 counts in at least 20% of the samples. Also add a pseduocount of 1 to all counts. This is so that later when we do different calculations (log, division, etc) we don’t get back errors due to zeroes
ps_filtered = filter_taxa(ps, function(x) sum(x > 3) > (0.2*length(x)), TRUE)
ps_filtered <- transform_sample_counts(ps_filtered, function(x) x+1)
# Also make a filtered version of the relative abundance count table (for plotting purposes)
ps_ra_filtered <- prune_taxa(taxa_names(ps_filtered),ps_ra) # prune from ps_ra object (relative abundances)
# check number of ASVs in each
ps
ps_filtered
ps_ra_filtered
Reduced from 13,427 to 979 ASVs
based on Coenen et al. tutorials for clustering. See repo
# Estimate covariance matrix for OTUs
covariance_matrix <- as.matrix(otu_table(ps_filtered)) %*% t(otu_table(ps_filtered))
# %*% = matrix multiplication sign in R; used here to multiply OTU/ASV data matrix to itself to estimate covariance.
# Evaluate determinant of covariance matrix
cov_determinant <- det(covariance_matrix)
cov_determinant
The determinant of the covariance matrix (what we just calculated) is equivalent to the product of the proportion of variance explained by every PCA axis. If the determinant is 0, that means there is an axis which explains 0 variance that we can’t separate from the other axes. This means the data need to be transformed to be suitable for PCA.
PCA is essentially a type of PCoA using the Euclidean distance matrix as input. When combined with a log-ratio transformation of the count table, this is deemed appropriate for compositional datasets.
First do a CLR, centered log ratio transformation of the absolute abundance data (after filtering), as suggested by Gloor et al. 2017 and check the determinant of this matrix. Compare it to the determinant without any transformation.
# Estimate covariance matrix for absolute abundance ASV table
covariance_matrix <- as.matrix(otu_table(ps_filtered)) %*% t(otu_table(ps_filtered))
# Evaluate determinant of covariance matrix
cov_determinant <- det(covariance_matrix)
# Estimate covariance matrix for CLR-transformed ASV table
clr_asv_table_ps_filtered <- data.frame(compositions::clr(t(otu_table(ps_filtered))))
## Check new determinant of clr transformed table
new_covdet <- det(as.matrix(clr_asv_table_ps_filtered) %*% t(clr_asv_table_ps_filtered))
# Compare
cov_determinant #Original Count Data
new_covdet # New
The determinant of the CLR-transformed table is not zero, so we can proceed with PCA of the CLR-transformed data.
Generate the PCA and visualize axes
# Generate a Principle Component Analysis (PCA) and evaluated based on the eigen decomposition from sample covariance matrix.
lograt_pca <- prcomp(clr_asv_table_ps_filtered)
# NOTE- this is equivalent to first making a Euclidean distance matrix using the CLR data table and then running a PCoA. A Euclidean distance matrix of a log-transformed data table = an Aitchison distance matrix. So this is equivalent to the compositional methods listed in Gloor et al.
# Visual representation with a screeplot
lograt_variances <- as.data.frame(lograt_pca$sdev^2/sum(lograt_pca$sdev^2)) %>% #Extract axes
# Format to plot
select(PercVar = 'lograt_pca$sdev^2/sum(lograt_pca$sdev^2)') %>%
rownames_to_column(var = "PCaxis") %>%
data.frame
head(lograt_variances)
# Plot screeplot
ggplot(lograt_variances, aes(x = as.numeric(PCaxis), y = PercVar)) +
geom_bar(stat = "identity", fill = "grey", color = "black") +
theme_minimal() +
theme(axis.title = element_text(color = "black", face = "bold", size = 10),
axis.text.y = element_text(color = "black", face = "bold"),
axis.text.x = element_blank()) +
labs(x = "PC axis", y = "% Variance", title = "Log-Ratio PCA Screeplot, CLR Tranformation")
First two axes explain a decent proportion of variance: 24.8 + 13.4 = 38.2
Visualize the PCA
# extract PC values
pca_lograt_frame <- data.frame(lograt_pca$x) %>%
rownames_to_column(var = "Sample Name")
# Merge metadata into the pca data table
pca_lograt_frame <- left_join(pca_lograt_frame, metadata, by = "Sample Name")
# reorder some factors to make them plot in the order I want
pca_lograt_frame <- pca_lograt_frame %>%
mutate(SizeFraction = fct_relevel(SizeFraction, "PA", "FL")) %>%
mutate(OxCond = fct_relevel(OxCond, "Oxycline", "ShallowAnoxic", "Euxinic"))
pca_lograt_frame
# Plot PCA with Redox Regime and Size fraction
pca_lograt_plot <- ggplot(pca_lograt_frame, aes(x = PC1, y = PC2, color = OxCond)) +
geom_point(aes(shape = SizeFraction), size = 4) +
ylab(paste0('PC2 ', round(lograt_variances[2,2]*100,2),'%')) + #Extract y axis value from variance
xlab(paste0('PC1 ', round(lograt_variances[1,2]*100,2),'%')) + #Extract x axis value from variance
ggtitle('CLR-Euclidean PCA') +
scale_color_manual(values = c("blue", "red", "brown4")) +
coord_fixed(ratio = 1) +
theme_bw()
pca_lograt_plot
Use vegan’s envfit to determine relationships between the ordination and environmental variables
# make metadata_ordinations, trimmed from metadata to only samples that are in PCA
metadata_ordinations <- metadata[metadata$`Sample Name` %in% sample_data(ps_filtered)$Sample.Name,]
# reorder some factors in metadata_ordinations to make them plot in the order I want
metadata_ordinations <- metadata_ordinations %>%
mutate(SizeFraction = fct_relevel(SizeFraction, "PA", "FL")) %>%
mutate(OxCond = fct_relevel(OxCond, "Oxycline", "ShallowAnoxic", "Euxinic"))
# sort clr_asv_table_ps_filtered in same order as metadata
clr_asv_table_ps_filtered <- clr_asv_table_ps_filtered[metadata_ordinations$"Sample Name",]
# re-run the PCA on clr_asv_table_ps_filtered
lograt_pca <- prcomp(clr_asv_table_ps_filtered)
# remove metadata that don't make sense to test (eg. NCBI sample IDs, etc.), repetitive variables (eg. Particulate S and TZVS), and those that didn't work on both cruises (like fluorescence, beam attenuation, etc)
metadata_ordinations <- select(metadata_ordinations, -Replicate, -Fluorescence, -BeamAtt, -TZVS, -Run, -"Assay Type", -AvgSpotLen, -Bases, -BioProject, -BioSample, -BioSampleModel, -Bytes, -"Center Name", -Collection_Date, -Consent, -"DATASTORE filetype", -"DATASTORE provider", -"DATASTORE region", -Experiment, -geo_loc_name_country, -geo_loc_name_country_continent, -geo_loc_name, -Instrument, -isolation_source, -lat_lon, -"Library Name", -LibraryLayout, -LibrarySelection, -LibrarySource, -Organism, -Platform, -ReleaseDate, -samp_collect_device, -"SRA Study", -Depth_m, -replicate, -size_fraction, -CH4_uM, -H2S_Um, -oxygen, -Oxygen_uM, -Particulate_Sulfur_uM, -salinity, -Temperature_degree_C, -TZVS_uM)
# change the name of some variables to make them easier to plot
metadata_ordinations <- rename(metadata_ordinations, PartS = ParticulateS, MicroAbun = "MicroAbun(x10^8 L^-1)", FlagAbun = "FlagAbun(x10^5 L-1)", VLPAbun = "VLP(x10^8 L-1)", Chemo = "Chemoautotrophy")
# fit environmental factors and save stats output
set.seed(10010)
pca_envfit <- envfit(lograt_pca, metadata_ordinations, permutations = 1000)
capture.output(pca_envfit, file = "stats_results/PCA_envfit_stat.txt")
pca_envfit
# significant vector variables, at the p<0.01 level: O2, Temp, Salinity, Particulate S, NO3, PO4, Chemoautotrophy, Flagellate Abundance
# significant centroid variables at the p<0.01 level: OxCond and SizeFraction
# fit species and save stats output
pca_sppfit <- envfit(lograt_pca, clr_asv_table_ps_filtered, permutations = 1000)
capture.output(pca_sppfit, file = "stats_results/PCA_sppfit_stat.txt")
pca_sppfit
Many of the typical variables that indicate redox condition are significant (O2, NO3, Particulate S,etc), plus size fraction. There are many species that are sig
Make individual envfit objects for all the vectors that will be plotted
# vectors
pca_envfit_O2 <- envfit(lograt_pca~O2, metadata_ordinations, permutations = 1000)
pca_envfit_partS <- envfit(lograt_pca~PartS, metadata_ordinations, permutations = 1000)
pca_envfit_NO3 <- envfit(lograt_pca~NO3, metadata_ordinations, permutations = 1000)
pca_envfit_PO4 <- envfit(lograt_pca~PO4, metadata_ordinations, permutations = 1000)
pca_envfit_temp <- envfit(lograt_pca~Temp, metadata_ordinations, permutations = 1000)
pca_envfit_sal <- envfit(lograt_pca~Salinity, metadata_ordinations, permutations = 1000)
pca_envfit_chemo <- envfit(lograt_pca~Chemo, metadata_ordinations, permutations = 1000)
pca_envfit_FlagAbun <- envfit(lograt_pca~FlagAbun, metadata_ordinations, permutations = 1000)
Next, trim the sppfit vegan object to just include those species with r2 value greater than 0.60 I got this function from here. Later, when plotting, I can also trim by p-value/
#__FUNCTION: select.envfit__#
# function (select.envit) filters the resulting list of function (envfit) based on their p values. This allows to display only significant values in the final plot.
# just run this
select.envfit<-function(fit, r.select){ #needs two sorts of input: fit= result of envfit, r.select= numeric, correlation minimum threshold
for (i in 1:length(fit$vectors$r)) { #run for-loop through the entire length of the column r in object fit$vectors$r starting at i=1
if (fit$vectors$r[i]<r.select) { #Check wether r<r.select, i.e. if the correlation is weaker than the threshold value. Change this Parameter for r-based selection
fit$vectors$arrows[i,]=NA #If the above statement is TRUE, i.e. r is smaller than r.select, then the coordinates of the vectors are set to NA, so they cannot be displayed
i=i+1 #increase the running parameter i from 1 to 2, i.e. check the next value in the column until every value has been checked
} #close if-loop
} #close for-loop
return(fit) #return fit as the result of the function
} #close the function
pca_sppfit_trim<-select.envfit(pca_sppfit, 0.6)
Complicated to plot vegan output in ggplot. Plot in base R
# Convert characters in metadata to factors
metadata_ordinations <- metadata_ordinations %>% mutate_if(sapply(metadata_ordinations, is.character), as.factor)
with(as.data.frame(metadata_ordinations), levels(OxCond))
with(as.data.frame(metadata_ordinations), levels(SizeFraction))
# Define colors and shapes for plot
colvec <- c("blue", "red", "brown4")
shapevec <- c(16,17)
# Plot here in notebook
# Set up 2x2 panels
op <- par(oma=c(0,0,0,1),# Room for the title and legend
mfrow=c(2,2),
mai=c(.65,.65,.1,0))
# Panel 1- Add first half of envfit vectors
with(metadata_ordinations, plot(scores(lograt_pca, display = "sites"), col = colvec[OxCond], pch = shapevec[SizeFraction], cex = 1.5, cex.lab = .8, cex.axis = .8, xlab = "", ylab = paste0('PC2 ', round(lograt_variances[2,2]*100,2),'%'), xaxt='n', xlim=c(-60,60)))
plot(pca_envfit_O2, p.max = 0.1, lwd = 2, col = "black", cex = 0.6)
plot(pca_envfit_partS, p.max = 0.1, lwd = 2, col = "black", cex = 0.6)
plot(pca_envfit_NO3, p.max = 0.1, lwd = 2, col = "black", cex = 0.6)
plot(pca_envfit_PO4, p.max = 0.1, lwd = 2, col = "black", cex = 0.6)
title("A", line = -1, adj = 0.02)
# Panel 2- Add rest of envfit vectors
with(metadata_ordinations, plot(scores(lograt_pca, display = "sites"), col = colvec[OxCond], pch = shapevec[SizeFraction], cex = 1.5, cex.lab = .8, cex.axis = .8, xlab = "", ylab = "", xaxt='n', yaxt='n', xlim=c(-60,60)))
plot(pca_envfit_temp, p.max = 0.1, lwd = 2, col = "black", cex = 0.6)
plot(pca_envfit_sal, p.max = 0.1, lwd = 2, col = "black", cex = 0.6)
plot(pca_envfit_FlagAbun, p.max = 0.1, lwd = 2, col = "black", cex = 0.6)
plot(pca_envfit_chemo, p.max = 0.1, lwd = 2, col = "black", cex = 0.6)
title("B", line = -1, adj = 0.02)
# Panel 3- Add spider lines indicating envfit centroids for Size Fraction
with(metadata_ordinations, plot(scores(lograt_pca, display = "sites"), col = colvec[OxCond], pch = shapevec[SizeFraction], cex = 1.5, cex.lab = .8, cex.axis = .8, xlab = paste0('PC1 ', round(lograt_variances[1,2]*100,2),'%'), ylab = paste0('PC2 ', round(lograt_variances[2,2]*100,2),'%'), xlim=c(-60,60)))
with(metadata_ordinations, ordispider(lograt_pca, SizeFraction, lwd = 1.5, lty = c(1,2), label = TRUE, cex = 0.6))
title("C", line = -1, adj = 0.02)
# Panel 4 -Add vectors indicating significant spp
with(metadata_ordinations, plot(scores(lograt_pca, display = "sites"), col = colvec[OxCond], pch = shapevec[SizeFraction], cex = 1.5, cex.lab = .8, cex.axis = .8, xlab = paste0('PC1 ', round(lograt_variances[1,2]*100,2),'%'), ylab = "", yaxt='n', xlim=c(-60,60)))
plot(pca_sppfit_trim, p.max = 0.001, col = "black", cex = 0.6)
# annotate the 3 clusters of ASVs in panel D
text(x=c(0), y=c(45), labels=c("Cluster I"), adj = 0.5, font = 2, cex = 0.8)
text(x=c(50), y=c(30), labels=c("Cluster II"), adj = 0.5, font = 2, cex = 0.8)
text(x=c(48), y=c(-35), labels=c("Cluster III"), adj = 0.5, font = 2, cex = 0.8)
text(x=c(-48), y=c(-18), labels=c("Cluster IV"), adj = 0.5, font = 2, cex = 0.8)
text(x=c(-48), y=c(10), labels=c("Cluster V"), adj = 0.5, font = 2, cex = 0.8)
title("D", line = -1, adj = 0.02)
# Add legend
par(op) # Leave the last plot
op <- par(usr=c(0,1,0,1), # Reset the coordinates
xpd=NA) # Allow plotting outside the plot region
legend(-0.018,.57, c("PA", "FL", "Oxycline", "Shallow Anoxic", "Euxinic"), col=c("black", "black","blue", "red", "brown4"), pch = c(16, 17, 15, 15, 15), box.col=NA, cex = .8, horiz = T, x.intersp = c(0.3), text.width = c(0, 0.18, 0.18, 0.18, 0.2))
# Set up EPS and make plot
setEPS(width = 6, height = 6)
postscript("Figures/PCA_envfit.eps")
# Set up 2x2 panels
op <- par(oma=c(0,0,0,1),# Room for the title and legend
mfrow=c(2,2),
mai=c(.65,.65,.1,0))
# Panel 1- Add first half of envfit vectors
with(metadata_ordinations, plot(scores(lograt_pca, display = "sites"), col = colvec[OxCond], pch = shapevec[SizeFraction], cex = 1.5, cex.lab = .8, cex.axis = .8, xlab = "", ylab = paste0('PC2 ', round(lograt_variances[2,2]*100,2),'%'), xaxt='n', xlim=c(-60,60)))
plot(pca_envfit_O2, p.max = 0.1, lwd = 2, col = "black", cex = 0.6)
plot(pca_envfit_partS, p.max = 0.1, lwd = 2, col = "black", cex = 0.6)
plot(pca_envfit_NO3, p.max = 0.1, lwd = 2, col = "black", cex = 0.6)
plot(pca_envfit_PO4, p.max = 0.1, lwd = 2, col = "black", cex = 0.6)
title("A", line = -1, adj = 0.02)
# Panel 2- Add rest of envfit vectors
with(metadata_ordinations, plot(scores(lograt_pca, display = "sites"), col = colvec[OxCond], pch = shapevec[SizeFraction], cex = 1.5, cex.lab = .8, cex.axis = .8, xlab = "", ylab = "", xaxt='n', yaxt='n', xlim=c(-60,60)))
plot(pca_envfit_temp, p.max = 0.1, lwd = 2, col = "black", cex = 0.6)
plot(pca_envfit_sal, p.max = 0.1, lwd = 2, col = "black", cex = 0.6)
plot(pca_envfit_FlagAbun, p.max = 0.1, lwd = 2, col = "black", cex = 0.6)
plot(pca_envfit_chemo, p.max = 0.1, lwd = 2, col = "black", cex = 0.6)
title("B", line = -1, adj = 0.02)
# Panel 3- Add spider lines indicating envfit centroids for Size Fraction
with(metadata_ordinations, plot(scores(lograt_pca, display = "sites"), col = colvec[OxCond], pch = shapevec[SizeFraction], cex = 1.5, cex.lab = .8, cex.axis = .8, xlab = paste0('PC1 ', round(lograt_variances[1,2]*100,2),'%'), ylab = paste0('PC2 ', round(lograt_variances[2,2]*100,2),'%'), xlim=c(-60,60)))
with(metadata_ordinations, ordispider(lograt_pca, SizeFraction, lwd = 1.5, lty = c(1,2), label = TRUE, cex = 0.6))
title("C", line = -1, adj = 0.02)
# Panel 4 -Add vectors indicating significant spp
with(metadata_ordinations, plot(scores(lograt_pca, display = "sites"), col = colvec[OxCond], pch = shapevec[SizeFraction], cex = 1.5, cex.lab = .8, cex.axis = .8, xlab = paste0('PC1 ', round(lograt_variances[1,2]*100,2),'%'), ylab = "", yaxt='n', xlim=c(-60,60)))
plot(pca_sppfit_trim, p.max = 0.001, col = "black", cex = 0.6)
# annotate the 3 clusters of ASVs in panel D
text(x=c(0), y=c(45), labels=c("Cluster I"), adj = 0.5, font = 2, cex = 0.8)
text(x=c(50), y=c(30), labels=c("Cluster II"), adj = 0.5, font = 2, cex = 0.8)
text(x=c(48), y=c(-35), labels=c("Cluster III"), adj = 0.5, font = 2, cex = 0.8)
text(x=c(-48), y=c(-18), labels=c("Cluster IV"), adj = 0.5, font = 2, cex = 0.8)
text(x=c(-48), y=c(10), labels=c("Cluster V"), adj = 0.5, font = 2, cex = 0.8)
title("D", line = -1, adj = 0.02)
# Add legend
par(op) # Leave the last plot
op <- par(usr=c(0,1,0,1), # Reset the coordinates
xpd=NA) # Allow plotting outside the plot region
legend(-0.018,.57, c("PA", "FL", "Oxycline", "Shallow Anoxic", "Euxinic"), col=c("black", "black","blue", "red", "brown4"), pch = c(16, 17, 15, 15, 15), box.col=NA, cex = .8, horiz = T, x.intersp = c(0.3), text.width = c(0, 0.18, 0.18, 0.18, 0.2))
dev.off()
For the manuscript, I want to discuss what these significant species are. Make a table:
# extract p-values for each species
fit_pvals <- pca_sppfit$vectors$pvals %>%
as.data.frame() %>%
rownames_to_column("ASVID") %>%
dplyr::rename("pvals" = ".")
# extract r2 values
fit_r2vals <- pca_sppfit$vectors$r %>%
as.data.frame() %>%
rownames_to_column("ASVID") %>%
dplyr::rename("r2vals" = ".")
# only keep species with p-val < 0.001 and r2 value >0.6
fit_spp <- pca_sppfit %>%
scores(., display = "vectors") %>%
as.data.frame() %>%
rownames_to_column("ASVID") %>%
full_join(., fit_pvals, by = "ASVID") %>%
full_join(., fit_r2vals, by = "ASVID") %>%
filter(pvals < 0.001) %>%
filter(r2vals > 0.6)
# --> filters to 107 species
# put in ASV identifying information
pca_sig_ASVs <- taxonomy %>%
mutate(ASVID = rownames(taxonomy)) %>%
right_join(fit_spp, by = "ASVID")
# sort by PC2 to differentiate those above and below the PC2= 0 axis
pca_sig_ASVs <- pca_sig_ASVs %>%
arrange(desc(PC2))
pca_sig_ASVs
# the vegan plot also scales the species scores to fit the current plot (which is why PC values don't match what is seen in plot) Get these scaled PC values
ordiArrowMul(lograt_pca, display = "species") #7.636856
ordiArrowMul(pca_sppfit, display = "vectors") #0.8291121
# export as table
write.csv(pca_sig_ASVs, file="stats_results/pca_sig_ASVs.csv", row.names=FALSE)
Import
arch_counts <- read_csv("Suter_2018_count_tables/Cariaco_AA_updated_raw.csv");
bac_counts <- read_csv("Suter_2018_count_tables/Cariaco_AB_updated_raw.csv");
Get sample names
bac_samples <- colnames(bac_counts)[2:49]
arch_samples <- colnames(arch_counts)[2:47]
bac_samples
arch_samples
Make separate taxonomy and count variables
arch_OTU <- arch_counts[,c("#OTU ID",arch_samples)]
arch_taxonomy <- arch_counts %>%
select(-arch_samples) %>%
select(-Sum)
arch_OTU
arch_taxonomy
bac_OTU <- bac_counts[,c("#OTU ID",bac_samples)]
bac_taxonomy <- bac_counts %>%
select(-bac_samples) %>%
select(-Sum) %>%
select(-"Interesting close relatives")
bac_OTU
bac_taxonomy
bac_OTU <- type_convert(as.data.frame(bac_OTU))
rownames(bac_OTU) <- bac_OTU$`#OTU ID`
bac_OTU <- bac_OTU[,!names(bac_OTU) %in% (c("#OTU ID"))]
bac_OTU = otu_table(bac_OTU, taxa_are_rows = TRUE)
#
arch_OTU <- type_convert(as.data.frame(arch_OTU))
rownames(arch_OTU) <- arch_OTU$`#OTU ID`
arch_OTU <- arch_OTU[,!names(arch_OTU) %in% (c("#OTU ID"))]
arch_OTU = otu_table(arch_OTU, taxa_are_rows = TRUE)
#
bac_TAX <- type_convert(as.data.frame(bac_taxonomy))
rownames(bac_TAX) <- bac_TAX$`#OTU ID`
bac_TAX <- bac_TAX[,!names(bac_TAX) %in% (c("#OTU ID"))]
bac_TAX = tax_table(as.matrix(bac_TAX))
#
arch_TAX <- type_convert(as.data.frame(arch_taxonomy))
rownames(arch_TAX) <- arch_TAX$`#OTU ID`
arch_TAX <- arch_TAX[,!names(arch_TAX) %in% (c("#OTU ID"))]
arch_TAX = tax_table(as.matrix(arch_TAX))
#
META = sample_data(data.frame(metadata, row.names = metadata$`Sample Name`))
#
ps_bac <- phyloseq(bac_OTU, bac_TAX, META)
ps_arch <- phyloseq(arch_OTU, arch_TAX, META)
Filter out the samples with low sequencing effort. These were previously identified for itags paper
taxa_to_keep_b <- !sample_names(ps_bac) %in% c("AB3a900A","AB2a200A","AB2b267A")
ps_bac <- prune_samples(taxa_to_keep_b, ps_bac)
taxa_to_keep_a <- !sample_names(ps_arch) %in% c("AA2b900AN","AA2a247B","AA2a900BN","AA2b900BN")
ps_arch <- prune_samples(taxa_to_keep_a, ps_arch)
First calculate relative abdunance of bac and arch OTU tables
ps_bac_ra <- microbiome::transform(ps_bac, transform = "compositional")
(otu_table(ps_bac_ra))[1:5,1:5]
ps_arch_ra <- microbiome::transform(ps_arch, transform = "compositional")
(otu_table(ps_arch_ra))[1:5,1:5]
Remove rows of glommed taxa from the full dataframe if their sum across all samples doesn’t exceed 5% (RA > 0.05)
# Bacteria
x <- taxa_sums(ps_bac_ra)
# keepTaxa <- base::which(x > .05)
keepTaxa <- x>.05 # prune_taxa require a logical not a list of IDs. compare to keepTaxa above to check
ps_bac_ra_pruned <- prune_taxa(keepTaxa, ps_bac_ra)
ps_bac_pruned <- prune_taxa(keepTaxa, ps_bac)
ps_bac_ra_pruned
ps_bac_pruned
# Archaea
x <- taxa_sums(ps_arch_ra)
# keepTaxa <- base::which(x > .05)
keepTaxa <- x>.05 # prune_taxa require a logical not a list of IDs. compare to keepTaxa above to check
ps_arch_ra_pruned <- prune_taxa(keepTaxa, ps_arch_ra)
ps_arch_pruned <- prune_taxa(keepTaxa, ps_arch)
ps_arch_ra_pruned
ps_arch_pruned
# Eukaryotes
x <- taxa_sums(ps_ra)
# keepTaxa <- base::which(x > .05)
keepTaxa <- x>.05 # prune_taxa require a logical not a list of IDs. compare to keepTaxa above to check
ps_euk_ra_pruned <- prune_taxa(keepTaxa, ps_ra)
ps_euk_pruned <- prune_taxa(keepTaxa, ps)
ps_euk_ra_pruned
ps_euk_pruned
Trimmed to 124 bacteria OTUs, 52 archaea OTUs, and 123 eukaryotic ASVs (299 total). Proceed with this dataset of the most abundant OTUs for correlations and network analyses…
To do the multi-domain analysis, the sample names from each phyloseq object must match. These currently have “B” for bacteria, A, E etc. Remove this letter from sample names so that “AE2a247B”, “AA2a247B”, “AB2a247B” all become just “Type” from the metadata sheet [IntNov1FL in this case- for Interface, November, rep 1, free-living].
Import my SampleKey
samplekey <- read_csv("SampleKey.csv")
Change the sample names in the otu tables to sample “Type”
# Archaea
# remove missing archaea samples from samplekey_A
samplekey_A <- filter(samplekey, SampleID_arch %in% colnames(otu_table(ps_arch_ra_pruned)))
# sort SampleKey by order of column names from ps_arch_ra_pruned
samplekey_A <- samplekey_A %>% arrange(factor(SampleID_arch, levels = colnames(otu_table(ps_arch_ra_pruned))))
# replace col names of otu table from ps_arch_ra_pruned
sample_names(ps_arch_ra_pruned) <- samplekey_A$Type
# and ps_arch_pruned
sample_names(ps_arch_pruned) <- samplekey_A$Type
# Bacteria
samplekey_B <- filter(samplekey, SampleID_bac %in% colnames(otu_table(ps_bac_ra_pruned)))
samplekey_B <- samplekey_B %>% arrange(factor(SampleID_bac, levels = colnames(otu_table(ps_bac_ra_pruned))))
sample_names(ps_bac_ra_pruned) <- samplekey_B$Type
sample_names(ps_bac_pruned) <- samplekey_B$Type
# Eukaryotes
samplekey_E <- filter(samplekey, SampleID_euk %in% colnames(otu_table(ps_euk_ra_pruned)))
samplekey_E <- samplekey_E %>% arrange(factor(SampleID_euk, levels = colnames(otu_table(ps_euk_ra_pruned))))
sample_names(ps_euk_ra_pruned) <- samplekey_E$Type
sample_names(ps_euk_pruned) <- samplekey_E$Type
Move all pruned otu tables into one table by matching the sample Type- will use this for SparCC Make one for the 3-domain analysis and one for the 2-domain analysis (bacteria and archaea only)
alldomains_df <- bind_rows(data.frame(otu_table(ps_bac_pruned)), data.frame(otu_table(ps_arch_pruned)), data.frame(otu_table(ps_euk_pruned)))
alldomains_df
twodomains_df <- bind_rows(data.frame(otu_table(ps_bac_pruned)), data.frame(otu_table(ps_arch_pruned)))
twodomains_df
Change row names from “denovoXXX” to meaningful names
alldomains_df_full <- cbind(ID = rownames(alldomains_df), alldomains_df)
twodomains_df_full <- cbind(ID = rownames(twodomains_df), twodomains_df)
# start with only first rows, which are bacteria. make one column of meaningful labels
temp1 <- left_join(alldomains_df_full[1:dim(otu_table(ps_bac_pruned))[1],], bac_taxonomy, by = c("ID" = "#OTU ID"))
temp1$New_ID <- paste(temp1$ID, temp1$"taxonomy-2", temp1$"taxonomy-3", temp1$"taxonomy-4")
temp1 <- select(temp1,-colnames(bac_taxonomy[,2:11]))
# next rows are the archaea
temp2 <- left_join(alldomains_df_full[sum(dim(otu_table(ps_bac_pruned))[1],1):sum(dim(otu_table(ps_bac_pruned))[1],dim(otu_table(ps_arch_pruned))[1]),], arch_taxonomy, by = c("ID" = "#OTU ID"))
temp2$New_ID <- paste(temp2$ID, temp2$"taxonomy-2", temp2$"taxonomy-3")
temp2 <- select(temp2,-colnames(arch_taxonomy[,2:9]))
# last rows are eukarya
euk_taxonomy <- cbind("#ASV ID" = rownames(taxonomy), taxonomy)
temp3 <- left_join(alldomains_df_full[sum(dim(otu_table(ps_arch_pruned))[1], dim(otu_table(ps_bac_pruned))[1],1):sum(dim(otu_table(ps_arch_pruned))[1], dim(otu_table(ps_bac_pruned))[1],dim(otu_table(ps_euk_pruned))[1]),], euk_taxonomy, by = c("ID" = "#ASV ID"))
temp3$New_ID <- paste(temp3$ID, temp3$"Supergroup", temp3$"Division", temp3$"Class", temp3$"Order")
temp3 <- select(temp3,-colnames(euk_taxonomy[,2:9]))
# combine back all 3 domains, with new names as row names in a dataframe
alldomains_df_full <- rbind(temp1, temp2, temp3)
alldomains_df_full <- data.frame(alldomains_df_full)
rownames(alldomains_df_full) <- alldomains_df_full$New_ID
alldomains_df_full <- select(alldomains_df_full, -c("ID","New_ID"))
# and make one for the 2-domain dataset
twodomains_df_full <- rbind(temp1, temp2)
twodomains_df_full <- data.frame(twodomains_df_full)
rownames(twodomains_df_full) <- twodomains_df_full$New_ID
twodomains_df_full <- select(twodomains_df_full, -c("ID","New_ID"))
Remove columns with NAs. These are samples for which the library for at least one domain didn’t work (can’t do correlations with missing values in columns)
alldomains_df_full <- alldomains_df_full %>%
select_if(~ !any(is.na(.)))
alldomains_df_full
alldomains_df <- alldomains_df %>%
select_if(~ !any(is.na(.)))
alldomains_df
twodomains_df_full <- twodomains_df_full %>%
select_if(~ !any(is.na(.)))
twodomains_df_full
twodomains_df <- twodomains_df %>%
select_if(~ !any(is.na(.)))
twodomains_df
Simlarly, make pruned datasets of the most abundant OTUs/ASVs in the oxycline, anoxic, and euxinic samples as separate datasets
Pull out samples and taxa from each redox regime
# Pull out oxycline bacteria sample IDs
oxyclinetypes_bac <- metadata %>%
filter(`Sample Name` %in% sample_names(ps_bac)) %>%
filter(OxCond == "Oxycline") %>%
select("Sample Name")
oxyclinetypes_bac <- unlist(c(unique(oxyclinetypes_bac)), use.names = FALSE)
# Pull out all bacteria from oxycline
ps_bac_oxycline <- prune_samples(oxyclinetypes_bac, ps_bac)
ps_bac_ra_oxycline <- prune_samples(oxyclinetypes_bac, ps_bac_ra)
# Pull out oxycline archaea sample IDs
oxyclinetypes_arch <- metadata %>%
filter(`Sample Name` %in% sample_names(ps_arch)) %>%
filter(OxCond == "Oxycline") %>%
select("Sample Name")
oxyclinetypes_arch <- unlist(c(unique(oxyclinetypes_arch)), use.names = FALSE)
# Pull out all archaea from oxycline
ps_arch_oxycline <- prune_samples(oxyclinetypes_arch, ps_arch)
ps_arch_ra_oxycline <- prune_samples(oxyclinetypes_arch, ps_arch_ra)
# Pull out oxycline eukaryotic sample IDs
oxyclinetypes_euk <- metadata %>%
filter(`Sample Name` %in% sample_names(ps)) %>%
filter(OxCond == "Oxycline") %>%
select("Sample Name")
oxyclinetypes_euk <- unlist(c(unique(oxyclinetypes_euk)), use.names = FALSE)
# Pull out all eukaryotes from oxycline
ps_euk_oxycline <- prune_samples(oxyclinetypes_euk, ps)
ps_euk_ra_oxycline <- prune_samples(oxyclinetypes_euk, ps_ra)
Filter out low abundance taxa from the oxycline samples. Use 5% as cutoff
# Bacteria
x <- taxa_sums(ps_bac_ra_oxycline)
keepTaxa <- x>.05 # prune_taxa require a logical not a list of IDs. compare to keepTaxa above to check
ps_bac_ra_oxycline_pruned <- prune_taxa(keepTaxa, ps_bac_ra_oxycline)
ps_bac_oxycline_pruned <- prune_taxa(keepTaxa, ps_bac_oxycline)
ps_bac_ra_oxycline_pruned
ps_bac_oxycline_pruned
# Archaea
x <- taxa_sums(ps_arch_ra_oxycline)
keepTaxa <- x>.05 # prune_taxa require a logical not a list of IDs. compare to keepTaxa above to check
ps_arch_ra_oxycline_pruned <- prune_taxa(keepTaxa, ps_arch_ra_oxycline)
ps_arch_oxycline_pruned <- prune_taxa(keepTaxa, ps_arch_oxycline)
ps_arch_ra_oxycline_pruned
ps_arch_oxycline_pruned
# Eukaryotes
x <- taxa_sums(ps_euk_ra_oxycline)
keepTaxa <- x>.05 # prune_taxa require a logical not a list of IDs. compare to keepTaxa above to check
ps_euk_ra_oxycline_pruned <- prune_taxa(keepTaxa, ps_euk_ra_oxycline)
ps_euk_oxycline_pruned <- prune_taxa(keepTaxa, ps_euk_oxycline)
ps_euk_ra_oxycline_pruned
ps_euk_oxycline_pruned
79 bacteria, 36 archaea, 76 eukaryota remain
Change the sample names in the otu tables to “Type”
# Archaea
# remove missing archaea samples from samplekey_A
samplekey_A <- filter(samplekey, SampleID_arch %in% colnames(otu_table(ps_arch_ra_oxycline_pruned)))
# sort SampleKey by order of column names from ps_arch_ra_oxycline_pruned
samplekey_A <- samplekey_A %>% arrange(factor(SampleID_arch, levels = colnames(otu_table(ps_arch_ra_oxycline_pruned))))
# replace col names of otu table from ps_arch_ra_oxycline_pruned
sample_names(ps_arch_ra_oxycline_pruned) <- samplekey_A$Type
# and ps_arch_pruned
sample_names(ps_arch_oxycline_pruned) <- samplekey_A$Type
# Bacteria
samplekey_B <- filter(samplekey, SampleID_bac %in% colnames(otu_table(ps_bac_ra_oxycline_pruned)))
samplekey_B <- samplekey_B %>% arrange(factor(SampleID_bac, levels = colnames(otu_table(ps_bac_ra_oxycline_pruned))))
sample_names(ps_bac_ra_oxycline_pruned) <- samplekey_B$Type
sample_names(ps_bac_oxycline_pruned) <- samplekey_B$Type
# Eukaryotes
samplekey_E <- filter(samplekey, SampleID_euk %in% colnames(otu_table(ps_euk_ra_oxycline_pruned)))
samplekey_E <- samplekey_E %>% arrange(factor(SampleID_euk, levels = colnames(otu_table(ps_euk_ra_oxycline_pruned))))
sample_names(ps_euk_ra_oxycline_pruned) <- samplekey_E$Type
sample_names(ps_euk_oxycline_pruned) <- samplekey_E$Type
Move all pruned otu tables into one table by matching the sample Type- will use this for SparCC
alldomains_df_oxycline <- bind_rows(data.frame(otu_table(ps_bac_oxycline_pruned)), data.frame(otu_table(ps_arch_oxycline_pruned)), data.frame(otu_table(ps_euk_oxycline_pruned)))
alldomains_df_oxycline
Change row names from “denovoXXX” to meaningful names
alldomains_df_full_oxycline <- cbind(ID = rownames(alldomains_df_oxycline), alldomains_df_oxycline)
# start with only first rows, which are bacteria. make one column of meaningful labels
temp1 <- left_join(alldomains_df_full_oxycline[1:dim(otu_table(ps_bac_oxycline_pruned))[1],], bac_taxonomy, by = c("ID" = "#OTU ID"))
temp1$New_ID <- paste(temp1$ID, temp1$"taxonomy-2", temp1$"taxonomy-3", temp1$"taxonomy-4")
temp1 <- select(temp1,-colnames(bac_taxonomy[,2:11]))
# next rows are the archaea
temp2 <- left_join(alldomains_df_full_oxycline[sum(dim(otu_table(ps_bac_oxycline_pruned))[1],1):sum(dim(otu_table(ps_bac_oxycline_pruned))[1],dim(otu_table(ps_arch_oxycline_pruned))[1]),], arch_taxonomy, by = c("ID" = "#OTU ID"))
temp2$New_ID <- paste(temp2$ID, temp2$"taxonomy-2", temp2$"taxonomy-3")
temp2 <- select(temp2,-colnames(arch_taxonomy[,2:9]))
# last rows are eukarya
euk_taxonomy <- cbind("#ASV ID" = rownames(taxonomy), taxonomy)
temp3 <- left_join(alldomains_df_full_oxycline[sum(dim(otu_table(ps_arch_oxycline_pruned))[1], dim(otu_table(ps_bac_oxycline_pruned))[1],1):sum(dim(otu_table(ps_arch_oxycline_pruned))[1], dim(otu_table(ps_bac_oxycline_pruned))[1],dim(otu_table(ps_euk_oxycline_pruned))[1]),], euk_taxonomy, by = c("ID" = "#ASV ID"))
temp3$New_ID <- paste(temp3$ID, temp3$"Supergroup", temp3$"Division", temp3$"Class", temp3$"Order")
temp3 <- select(temp3,-colnames(euk_taxonomy[,2:9]))
# combine back all 3 domains, with new names as row names in a dataframe
alldomains_df_full_oxycline <- rbind(temp1, temp2, temp3)
alldomains_df_full_oxycline <- data.frame(alldomains_df_full_oxycline)
rownames(alldomains_df_full_oxycline) <- alldomains_df_full_oxycline$New_ID
alldomains_df_full_oxycline <- select(alldomains_df_full_oxycline, -c("ID","New_ID"))
alldomains_df_full_oxycline
Remove columns with NAs. These are samples for which the library for at least one domain didn’t work (can’t do correlations with missing values in columns)
alldomains_df_full_oxycline <- alldomains_df_full_oxycline %>%
select_if(~ !any(is.na(.)))
alldomains_df_full_oxycline
alldomains_df_oxycline <- alldomains_df_oxycline %>%
select_if(~ !any(is.na(.)))
alldomains_df_oxycline
21 samples remain for correlation
Pull out samples from shallow anoxic regime
# Pull out anoxic layer bacteria sample IDs
anoxictypes_bac <- metadata %>%
filter(`Sample Name` %in% sample_names(ps_bac)) %>%
filter(OxCond == "ShallowAnoxic") %>%
select("Sample Name")
anoxictypes_bac <- unlist(c(unique(anoxictypes_bac)), use.names = FALSE)
# Pull out all bacteria from anoxic layer
ps_bac_anoxic <- prune_samples(anoxictypes_bac, ps_bac)
ps_bac_ra_anoxic <- prune_samples(anoxictypes_bac, ps_bac_ra)
# Pull out anoxic layer archaea sample IDs
anoxictypes_arch <- metadata %>%
filter(`Sample Name` %in% sample_names(ps_arch)) %>%
filter(OxCond == "ShallowAnoxic") %>%
select("Sample Name")
anoxictypes_arch <- unlist(c(unique(anoxictypes_arch)), use.names = FALSE)
# Pull out all archaea from anoxic layer
ps_arch_anoxic<- prune_samples(anoxictypes_arch, ps_arch)
ps_arch_ra_anoxic <- prune_samples(anoxictypes_arch, ps_arch_ra)
# Pull out anoxic layer eukaryotic sample IDs
anoxictypes_euk <- metadata %>%
filter(`Sample Name` %in% sample_names(ps)) %>%
filter(OxCond == "ShallowAnoxic") %>%
select("Sample Name")
anoxictypes_euk <- unlist(c(unique(anoxictypes_euk)), use.names = FALSE)
# Pull out all eukaryotes from anoxic layer
ps_euk_anoxic <- prune_samples(anoxictypes_euk, ps)
ps_euk_ra_anoxic <- prune_samples(anoxictypes_euk, ps_ra)
Filter out low abundance taxa from the oxycline samples. Use 5% as cutoff
# Bacteria
x <- taxa_sums(ps_bac_ra_anoxic)
keepTaxa <- x>.05 # prune_taxa require a logical not a list of IDs. compare to keepTaxa above to check
ps_bac_ra_anoxic_pruned <- prune_taxa(keepTaxa, ps_bac_ra_anoxic)
ps_bac_anoxic_pruned <- prune_taxa(keepTaxa, ps_bac_anoxic)
ps_bac_ra_anoxic_pruned
ps_bac_anoxic_pruned
# Archaea
x <- taxa_sums(ps_arch_ra_anoxic)
keepTaxa <- x>.05 # prune_taxa require a logical not a list of IDs. compare to keepTaxa above to check
ps_arch_ra_anoxic_pruned <- prune_taxa(keepTaxa, ps_arch_ra_anoxic)
ps_arch_anoxic_pruned <- prune_taxa(keepTaxa, ps_arch_anoxic)
ps_arch_ra_anoxic_pruned
ps_arch_anoxic_pruned
# Eukaryotes
x <- taxa_sums(ps_euk_ra_anoxic)
keepTaxa <- x>.05 # prune_taxa require a logical not a list of IDs. compare to keepTaxa above to check
ps_euk_ra_anoxic_pruned <- prune_taxa(keepTaxa, ps_euk_ra_anoxic)
ps_euk_anoxic_pruned <- prune_taxa(keepTaxa, ps_euk_anoxic)
ps_euk_ra_anoxic_pruned
ps_euk_anoxic_pruned
32 bacteria, 19 archaea, 37 eukaryota remain
Change the sample names in the otu tables to “Type”
# Archaea
# remove missing archaea samples from samplekey_A
samplekey_A <- filter(samplekey, SampleID_arch %in% colnames(otu_table(ps_arch_ra_anoxic_pruned)))
# sort SampleKey by order of column names from ps_arch_ra_anoxic_pruned
samplekey_A <- samplekey_A %>% arrange(factor(SampleID_arch, levels = colnames(otu_table(ps_arch_ra_anoxic_pruned))))
# replace col names of otu table from ps_arch_ra_anoxic_pruned
sample_names(ps_arch_ra_anoxic_pruned) <- samplekey_A$Type
# and ps_arch_pruned
sample_names(ps_arch_anoxic_pruned) <- samplekey_A$Type
# Bacteria
samplekey_B <- filter(samplekey, SampleID_bac %in% colnames(otu_table(ps_bac_ra_anoxic_pruned)))
samplekey_B <- samplekey_B %>% arrange(factor(SampleID_bac, levels = colnames(otu_table(ps_bac_ra_anoxic_pruned))))
sample_names(ps_bac_ra_anoxic_pruned) <- samplekey_B$Type
sample_names(ps_bac_anoxic_pruned) <- samplekey_B$Type
# Eukaryotes
samplekey_E <- filter(samplekey, SampleID_euk %in% colnames(otu_table(ps_euk_ra_anoxic_pruned)))
samplekey_E <- samplekey_E %>% arrange(factor(SampleID_euk, levels = colnames(otu_table(ps_euk_ra_anoxic_pruned))))
sample_names(ps_euk_ra_anoxic_pruned) <- samplekey_E$Type
sample_names(ps_euk_anoxic_pruned) <- samplekey_E$Type
Move all pruned otu tables into one table by matching the sample Type- will use this for SparCC
alldomains_df_anoxic <- bind_rows(data.frame(otu_table(ps_bac_anoxic_pruned)), data.frame(otu_table(ps_arch_anoxic_pruned)), data.frame(otu_table(ps_euk_anoxic_pruned)))
alldomains_df_anoxic
Change row names from “denovoXXX” to meaningful names
alldomains_df_full_anoxic <- cbind(ID = rownames(alldomains_df_anoxic), alldomains_df_anoxic)
# start with only first rows, which are bacteria. make one column of meaningful labels
temp1 <- left_join(alldomains_df_full_anoxic[1:dim(otu_table(ps_bac_anoxic_pruned))[1],], bac_taxonomy, by = c("ID" = "#OTU ID"))
temp1$New_ID <- paste(temp1$ID, temp1$"taxonomy-2", temp1$"taxonomy-3", temp1$"taxonomy-4")
temp1 <- select(temp1,-colnames(bac_taxonomy[,2:11]))
# next rows are the archaea
temp2 <- left_join(alldomains_df_full_anoxic[sum(dim(otu_table(ps_bac_anoxic_pruned))[1],1):sum(dim(otu_table(ps_bac_anoxic_pruned))[1],dim(otu_table(ps_arch_anoxic_pruned))[1]),], arch_taxonomy, by = c("ID" = "#OTU ID"))
temp2$New_ID <- paste(temp2$ID, temp2$"taxonomy-2", temp2$"taxonomy-3")
temp2 <- select(temp2,-colnames(arch_taxonomy[,2:9]))
# last rows are eukarya
euk_taxonomy <- cbind("#ASV ID" = rownames(taxonomy), taxonomy)
temp3 <- left_join(alldomains_df_full_anoxic[sum(dim(otu_table(ps_arch_anoxic_pruned))[1], dim(otu_table(ps_bac_anoxic_pruned))[1],1):sum(dim(otu_table(ps_arch_anoxic_pruned))[1], dim(otu_table(ps_bac_anoxic_pruned))[1],dim(otu_table(ps_euk_anoxic_pruned))[1]),], euk_taxonomy, by = c("ID" = "#ASV ID"))
temp3$New_ID <- paste(temp3$ID, temp3$"Supergroup", temp3$"Division", temp3$"Class", temp3$"Order")
temp3 <- select(temp3,-colnames(euk_taxonomy[,2:9]))
# combine back all 3 domains, with new names as row names in a dataframe
alldomains_df_full_anoxic <- rbind(temp1, temp2, temp3)
alldomains_df_full_anoxic <- data.frame(alldomains_df_full_anoxic)
rownames(alldomains_df_full_anoxic) <- alldomains_df_full_anoxic$New_ID
alldomains_df_full_anoxic <- select(alldomains_df_full_anoxic, -c("ID","New_ID"))
alldomains_df_full_anoxic
Remove columns with NAs. These are samples for which the library for at least one domain didn’t work (can’t do correlations with missing values in columns)
alldomains_df_full_anoxic <- alldomains_df_full_anoxic %>%
select_if(~ !any(is.na(.)))
alldomains_df_full_anoxic
alldomains_df_anoxic <- alldomains_df_anoxic %>%
select_if(~ !any(is.na(.)))
alldomains_df_anoxic
11 samples remain for correlation
Pull out samples from euxinic regime
# Pull out anoxic layer bacteria sample IDs
euxinictypes_bac <- metadata %>%
filter(`Sample Name` %in% sample_names(ps_bac)) %>%
filter(OxCond == "Euxinic") %>%
select("Sample Name")
euxinictypes_bac <- unlist(c(unique(euxinictypes_bac)), use.names = FALSE)
# Pull out all bacteria from euxinic layer
ps_bac_euxinic <- prune_samples(euxinictypes_bac, ps_bac)
ps_bac_ra_euxinic <- prune_samples(euxinictypes_bac, ps_bac_ra)
# Pull out euxinic layer archaea sample IDs
euxinictypes_arch <- metadata %>%
filter(`Sample Name` %in% sample_names(ps_arch)) %>%
filter(OxCond == "Euxinic") %>%
select("Sample Name")
euxinictypes_arch <- unlist(c(unique(euxinictypes_arch)), use.names = FALSE)
# Pull out all archaea from euxinic layer
ps_arch_euxinic<- prune_samples(euxinictypes_arch, ps_arch)
ps_arch_ra_euxinic <- prune_samples(euxinictypes_arch, ps_arch_ra)
# Pull out euxinic layer eukaryotic sample IDs
euxinictypes_euk <- metadata %>%
filter(`Sample Name` %in% sample_names(ps)) %>%
filter(OxCond == "Euxinic") %>%
select("Sample Name")
euxinictypes_euk <- unlist(c(unique(euxinictypes_euk)), use.names = FALSE)
# Pull out all eukaryotes from euxinic layer
ps_euk_euxinic <- prune_samples(euxinictypes_euk, ps)
ps_euk_ra_euxinic <- prune_samples(euxinictypes_euk, ps_ra)
Filter out low abundance taxa from the oxycline samples. Use 5% as cutoff
# Bacteria
x <- taxa_sums(ps_bac_ra_euxinic)
keepTaxa <- x>.05 # prune_taxa require a logical not a list of IDs. compare to keepTaxa above to check
ps_bac_ra_euxinic_pruned <- prune_taxa(keepTaxa, ps_bac_ra_euxinic)
ps_bac_euxinic_pruned <- prune_taxa(keepTaxa, ps_bac_euxinic)
ps_bac_ra_euxinic_pruned
ps_bac_euxinic_pruned
# Archaea
x <- taxa_sums(ps_arch_ra_euxinic)
keepTaxa <- x>.05 # prune_taxa require a logical not a list of IDs. compare to keepTaxa above to check
ps_arch_ra_euxinic_pruned <- prune_taxa(keepTaxa, ps_arch_ra_euxinic)
ps_arch_euxinic_pruned <- prune_taxa(keepTaxa, ps_arch_euxinic)
ps_arch_ra_euxinic_pruned
ps_arch_euxinic_pruned
# Eukaryotes
x <- taxa_sums(ps_euk_ra_euxinic)
keepTaxa <- x>.05 # prune_taxa require a logical not a list of IDs. compare to keepTaxa above to check
ps_euk_ra_euxinic_pruned <- prune_taxa(keepTaxa, ps_euk_ra_euxinic)
ps_euk_euxinic_pruned <- prune_taxa(keepTaxa, ps_euk_euxinic)
ps_euk_ra_euxinic_pruned
ps_euk_euxinic_pruned
16 bacteria, 16 archaea, 20 eukaryota remain
Change the sample names in the otu tables to “Type”
# Archaea
# remove missing archaea samples from samplekey_A
samplekey_A <- filter(samplekey, SampleID_arch %in% colnames(otu_table(ps_arch_ra_euxinic_pruned)))
# sort SampleKey by order of column names from ps_arch_ra_euxinic_pruned
samplekey_A <- samplekey_A %>% arrange(factor(SampleID_arch, levels = colnames(otu_table(ps_arch_ra_euxinic_pruned))))
# replace col names of otu table from ps_arch_ra_euxinic_pruned
sample_names(ps_arch_ra_euxinic_pruned) <- samplekey_A$Type
# and ps_arch_pruned
sample_names(ps_arch_euxinic_pruned) <- samplekey_A$Type
# Bacteria
samplekey_B <- filter(samplekey, SampleID_bac %in% colnames(otu_table(ps_bac_ra_euxinic_pruned)))
samplekey_B <- samplekey_B %>% arrange(factor(SampleID_bac, levels = colnames(otu_table(ps_bac_ra_euxinic_pruned))))
sample_names(ps_bac_ra_euxinic_pruned) <- samplekey_B$Type
sample_names(ps_bac_euxinic_pruned) <- samplekey_B$Type
# Eukaryotes
samplekey_E <- filter(samplekey, SampleID_euk %in% colnames(otu_table(ps_euk_ra_euxinic_pruned)))
samplekey_E <- samplekey_E %>% arrange(factor(SampleID_euk, levels = colnames(otu_table(ps_euk_ra_euxinic_pruned))))
sample_names(ps_euk_ra_euxinic_pruned) <- samplekey_E$Type
sample_names(ps_euk_euxinic_pruned) <- samplekey_E$Type
Move all pruned otu tables into one table by matching the sample Type- will use this for SparCC
alldomains_df_euxinic <- bind_rows(data.frame(otu_table(ps_bac_euxinic_pruned)), data.frame(otu_table(ps_arch_euxinic_pruned)), data.frame(otu_table(ps_euk_euxinic_pruned)))
alldomains_df_euxinic
Change row names from “denovoXXX” to meaningful names
alldomains_df_full_euxinic <- cbind(ID = rownames(alldomains_df_euxinic), alldomains_df_euxinic)
# start with only first rows, which are bacteria. make one column of meaningful labels
temp1 <- left_join(alldomains_df_full_euxinic[1:dim(otu_table(ps_bac_euxinic_pruned))[1],], bac_taxonomy, by = c("ID" = "#OTU ID"))
temp1$New_ID <- paste(temp1$ID, temp1$"taxonomy-2", temp1$"taxonomy-3", temp1$"taxonomy-4")
temp1 <- select(temp1,-colnames(bac_taxonomy[,2:11]))
# next rows are the archaea
temp2 <- left_join(alldomains_df_full_euxinic[sum(dim(otu_table(ps_bac_euxinic_pruned))[1],1):sum(dim(otu_table(ps_bac_euxinic_pruned))[1],dim(otu_table(ps_arch_euxinic_pruned))[1]),], arch_taxonomy, by = c("ID" = "#OTU ID"))
temp2$New_ID <- paste(temp2$ID, temp2$"taxonomy-2", temp2$"taxonomy-3")
temp2 <- select(temp2,-colnames(arch_taxonomy[,2:9]))
# last rows are eukarya
euk_taxonomy <- cbind("#ASV ID" = rownames(taxonomy), taxonomy)
temp3 <- left_join(alldomains_df_full_euxinic[sum(dim(otu_table(ps_arch_euxinic_pruned))[1], dim(otu_table(ps_bac_euxinic_pruned))[1],1):sum(dim(otu_table(ps_arch_euxinic_pruned))[1], dim(otu_table(ps_bac_euxinic_pruned))[1],dim(otu_table(ps_euk_euxinic_pruned))[1]),], euk_taxonomy, by = c("ID" = "#ASV ID"))
temp3$New_ID <- paste(temp3$ID, temp3$"Supergroup", temp3$"Division", temp3$"Class", temp3$"Order")
temp3 <- select(temp3,-colnames(euk_taxonomy[,2:9]))
# combine back all 3 domains, with new names as row names in a dataframe
alldomains_df_full_euxinic <- rbind(temp1, temp2, temp3)
alldomains_df_full_euxinic <- data.frame(alldomains_df_full_euxinic)
rownames(alldomains_df_full_euxinic) <- alldomains_df_full_euxinic$New_ID
alldomains_df_full_euxinic <- select(alldomains_df_full_euxinic, -c("ID","New_ID"))
alldomains_df_full_euxinic
Remove columns with NAs. These are samples for which the library for at least one domain didn’t work (can’t do correlations with missing values in columns)
alldomains_df_full_euxinic <- alldomains_df_full_euxinic %>%
select_if(~ !any(is.na(.)))
alldomains_df_full_euxinic
alldomains_df_euxinic <- alldomains_df_euxinic %>%
select_if(~ !any(is.na(.)))
alldomains_df_euxinic
4 samples remain for correlation
This is largely based on BVCN tutorials NOTE- input for SparCC should be raw count data (after filtering out low-abundance ASVs). The function does a log-ratio transformation to account for compositionality
# Helper functions from J. Cram https://biovcnet.github.io/_pages/NetworkScience_SparCC.nb
pass <- function(x){x}
# Get lower triangle of the correlation matrix
get_lower_tri<-function(cormat){
cormat[upper.tri(cormat)] <- NA
return(cormat)
}
# Get upper triangle of the correlation matrix
get_upper_tri <- function(cormat){
cormat[lower.tri(cormat)]<- NA
return(cormat)
}
reorder_cormat <- function(cormat){
# Use correlation between variables as distance
dd <- as.dist((1-cormat)/2)
hc <- hclust(dd)
cormat <-cormat[hc$order, hc$order]
}
reorder_cor_and_p <- function(cormat, pmat){
dd <- as.dist((1-cormat)/2)
hc <- hclust(dd)
cormat <-cormat[hc$order, hc$order]
pmat <- pmat[hc$order, hc$order]
list(r = cormat, p = pmat)
}
sparcctable_alldomains <- sparcc(t(alldomains_df))
Put sample names back into result tables
rownames(sparcctable_alldomains$Cor) <- rownames(alldomains_df_full)
colnames(sparcctable_alldomains$Cor) <- rownames(alldomains_df_full)
rownames(sparcctable_alldomains$Cov) <- rownames(alldomains_df_full)
colnames(sparcctable_alldomains$Cov) <- rownames(alldomains_df_full)
sparcctable_alldomains$Cor[1:2,1:2]
Plot correlation
plotableSparcc <- sparcctable_alldomains$Cor %>% reorder_cormat %>% get_upper_tri() %>% reshape2::melt() %>% na.omit()
Sparcc_plot <- plotableSparcc %>% ggplot(aes(x = Var2, y = Var1, fill = value)) + geom_tile() + scale_fill_gradient2() + theme(axis.text.x = element_text(angle = 90, hjust = 1))
Sparcc_plot
# ggsave("figures/sparcc_corr_alldomains.eps",Sparcc_plot, width = 35, height = 35, units = c("in"))
Calculate Sparcc p-values by bootstrapping- TAKES A LONG TIME
# tp0 <- proc.time()
# out2 <- sparccboot(t(alldomains_df), R = 1000, ncpus = 2)
# tp1 <- proc.time()
# tp1 - tp0
The above took ~14 hours to run 1000 iterations
Extract p-values
outP <- pval.sparccboot(out2)
data.frame(outP$cors, outP$pvals) %>% head
cors <- outP$cors
pvals <- outP$pvals
sparCCpcors <- diag(0.5, nrow = dim(sparcctable_alldomains$Cor)[1], ncol = dim(sparcctable_alldomains$Cor)[1])
sparCCpcors[upper.tri(sparCCpcors, diag=FALSE)] <- cors
sparCCpcors <- sparCCpcors + t(sparCCpcors)
sparCCpval <- diag(0.5, nrow = dim(sparcctable_alldomains$Cor)[1], ncol = dim(sparcctable_alldomains$Cor)[1])
sparCCpval[upper.tri(sparCCpval, diag=FALSE)] <- pvals
sparCCpval <- sparCCpval + t(sparCCpval)
rownames(sparCCpcors) <- rownames(alldomains_df_full)
colnames(sparCCpcors) <- rownames(alldomains_df_full)
rownames(sparCCpval) <- rownames(alldomains_df_full)
colnames(sparCCpval) <- rownames(alldomains_df_full)
sparCCpcors[1:2, 1:2]
sparCCpval[1:2, 1:2]
Reorder for plotting
reordered_all_sparcc <- reorder_cor_and_p(sparCCpcors, sparCCpval)
reordered_sparccCor <- reordered_all_sparcc$r
reordered_sparccP<- reordered_all_sparcc$p
sparccCor_processed <- reordered_sparccCor %>% get_upper_tri() %>% reshape2::melt() %>% na.omit() %>% rename(cor = value)
sparccP_processed <- reordered_sparccP %>% get_upper_tri() %>% reshape2::melt() %>% na.omit() %>% rename(p = value)
# join the two data frames
SparccP <- left_join(sparccCor_processed, sparccP_processed, by = c("Var1", "Var2")) %>%
# # remove self correlations
# filter(Var1 != Var2) %>%
# calculate the false discovery rate to adjust for multiple p values
mutate(fdr = p.adjust(p, method = "BH"))
And plot correlation with p-values. Circles mean that the relationship is sig. at p = 0.05 level, based on bootstrapping
fdrThresh <- 0.01 # fdr threshold
sparccOkP <- SparccP%>% filter(fdr < fdrThresh)
SparccP_plot <- SparccP %>% ggplot(aes(x = Var2, y = Var1, fill = cor)) + geom_tile() + scale_fill_gradient2() + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + geom_point(data = sparccOkP, shape = 1)
SparccP_plot
ggsave("figures/sparcc_corr_alldomains_w_pvals.eps",SparccP_plot, width = 35, height = 35, units = c("in"))
Save environment again
# save.image("EnvironmentBackups/CariacoEuks_postanalysis_vars_upto_sparcc_bootstrap.RData")
Or load if coming back
load("EnvironmentBackups/CariacoEuks_postanalysis_vars_upto_sparcc_bootstrap.RData")
Try the SpiecEasi method, which accounts for sparse data, as described in the SpiecEasi publication, spieceasi github, and BVCN lessons 1.2. This reduces the clumps (eg. sparse relationships that are secondary or teriary, not direct relationships).
Make functions from tutorial
convertSEToTable <- function(se_out,sp.names){
#This is just a fancy helper function to get the data in a comparable format to the output of lesson 1 so we can make a similar plot. We will cover other methods for visualizing this type of output in future lessons.
secor <- cov2cor(as.matrix(getOptCov(se_out))) # See spieceasi documentation for how to pull out weights for comparison
elist <- summary(triu(secor*getRefit(se_out), k=1))
elist[,1] <- sp.names[elist[,1]]
elist[,2] <- sp.names[elist[,2]]
elist[,4] <- paste(elist[,1],elist[,2])
full_e <- expand.grid(sp.names,sp.names)
rownames(full_e) <- paste(full_e[,1],full_e[,2])
full_e[,"Weight"] <- 0
full_e[elist[,4],"Weight"] <- elist[,3]
x <- expand.grid(1:length(sp.names),1:length(sp.names))
full_e[x[,"Var1"]>x[,"Var2"],"Weight"] <- NA
return(as.data.frame(full_e,stringsAsFactors=F))
}
Follow the spieceasi documentation to find optimal parameters. Also, because I want to compare networks, this convo on using optimal parameters for different network comparisons is helpful.
Remove samples from the phyloseq objects that are not in all 3 domains and reorder samples so they are in same order in all 3 objects
bac_arch_common <- intersect(sample_names(ps_bac_ra_pruned), sample_names(ps_arch_ra_pruned))
all_common <- intersect(bac_arch_common, sample_names(ps_euk_ra_pruned))
ps_bac_pruned_3domains <- prune_samples(all_common, ps_bac_pruned)
ps_arch_pruned_3domains <- prune_samples(all_common, ps_arch_pruned)
ps_euk_pruned_3domains <- prune_samples(all_common, ps_euk_pruned)
ps_bac_ra_pruned_3domains <- prune_samples(all_common, ps_bac_ra_pruned)
ps_arch_ra_pruned_3domains <- prune_samples(all_common, ps_arch_ra_pruned)
ps_euk_ra_pruned_3domains <- prune_samples(all_common, ps_euk_ra_pruned)
# make sure samples are in same order
otu_table(ps_arch_pruned_3domains) <- otu_table(ps_arch_pruned_3domains)[,sample_names(ps_bac_ra_pruned_3domains)]
otu_table(ps_euk_pruned_3domains) <- otu_table(ps_euk_pruned_3domains)[,sample_names(ps_bac_ra_pruned_3domains)]
#Run Spieceasi on 3 domain dataset based on 5% RA cutoff
pargs <- list(seed=10010)
se <- spiec.easi(list(ps_bac_pruned_3domains, ps_arch_pruned_3domains, ps_euk_pruned_3domains), method='glasso', lambda.min.ratio=1e-2, nlambda=100, pulsar.params=pargs)
getStability(se)
the above takes a while to run (20-30 mins). Using parameters above, the stability along the lambda path (in se$select$stars$summary) crosses the 0.05 threshold and the final stability value (0.044) is sufficiently close to 0.05
#This is just a fancy helper function to get the data in a comparable format to the output of above
tab.se <- convertSEToTable(se,sp.names=colnames(t(alldomains_df_full)))
#Plot
plot.se <- ggplot(tab.se,aes(x = Var1, y = Var2, fill = Weight)) + geom_tile() + scale_fill_gradient2() + theme(axis.text.x = element_text(angle = 90, hjust = 1))
plot(plot.se)
ggsave("figures/spieceasi_alldomains.eps",plot.se, width = 35, height = 35, units = c("in"))
Note- only the significant values above show up in the heatmap above (ie. there is no “p-value”)
Remove samples from the phyloseq objects that are not in both domains and reorder samples so they are in same order in all 3 objects
bac_arch_common <- intersect(sample_names(ps_bac_ra_pruned), sample_names(ps_arch_ra_pruned))
ps_bac_pruned_2domains <- prune_samples(bac_arch_common, ps_bac_pruned)
ps_arch_pruned_2domains <- prune_samples(bac_arch_common, ps_arch_pruned)
ps_bac_ra_pruned_2domains <- prune_samples(bac_arch_common, ps_bac_ra_pruned)
ps_arch_ra_pruned_2domains <- prune_samples(bac_arch_common, ps_arch_ra_pruned)
otu_table(ps_arch_pruned_2domains) <- otu_table(ps_arch_pruned_2domains)[,sample_names(ps_bac_ra_pruned_3domains)]
sample_data(ps_bac_pruned_2domains)
sample_data(ps_arch_pruned_2domains)
#Run Spieceasi
pargs <- list(seed=10010)
se.2domains <- spiec.easi(list(ps_bac_pruned_2domains, ps_arch_pruned_2domains), method='glasso', lambda.min.ratio=1e-2, nlambda=200, pulsar.params=pargs)
getStability(se.2domains)
the above takes a while to run . Using parameters above, the stability along the lambda path crosses the 0.05 threshold and the final stability value (0.046) is close to 0.05
#This is just a fancy helper function to get the data in a comparable format to the output of above
tab.se <- convertSEToTable(se.2domains,sp.names=colnames(t(twodomains_df_full)))
#Plot
plot.se <- ggplot(tab.se,aes(x = Var1, y = Var2, fill = Weight)) + geom_tile() + scale_fill_gradient2() + theme(axis.text.x = element_text(angle = 90, hjust = 1))
plot(plot.se)
ggsave("figures/spieceasi_2domains.eps",plot.se, width = 35, height = 35, units = c("in"))
Note- only the significant values above show up in the heatmap above (ie. there is no “p-value”)
bac_arch_common <- intersect(sample_names(ps_bac_oxycline_pruned), sample_names(ps_arch_oxycline_pruned))
all_common <- intersect(bac_arch_common, sample_names(ps_euk_oxycline_pruned))
ps_bac_oxycline_pruned <- prune_samples(all_common, ps_bac_oxycline_pruned)
ps_arch_oxycline_pruned <- prune_samples(all_common, ps_arch_oxycline_pruned)
ps_euk_oxycline_pruned <- prune_samples(all_common, ps_euk_oxycline_pruned)
otu_table(ps_arch_oxycline_pruned) <- otu_table(ps_arch_oxycline_pruned)[,sample_names(ps_bac_oxycline_pruned)]
otu_table(ps_euk_oxycline_pruned) <- otu_table(ps_euk_oxycline_pruned)[,sample_names(ps_bac_oxycline_pruned)]
sample_data(ps_bac_oxycline_pruned)
sample_data(ps_arch_oxycline_pruned)
sample_data(ps_euk_oxycline_pruned)
#Run Spieceasi
pargs <- list(seed=10010)
se.oxycline <- spiec.easi(list(ps_bac_oxycline_pruned, ps_arch_oxycline_pruned, ps_euk_oxycline_pruned), method='glasso', lambda.min.ratio=5e-3, nlambda=300, pulsar.params=pargs)
getStability(se.oxycline)
the above takes a couple of minutes to run. Stability and stability along lambda path are very similar to the full dataset spieceasi object (se) with these parameters above. Continue with these.
#This is just a fancy helper function to get the data in a comparable format to the output of above
tab.se.oxycline <- convertSEToTable(se.oxycline, sp.names=colnames(t(alldomains_df_full_oxycline)))
#Plot
plot.se.oxycline <- ggplot(tab.se.oxycline,aes(x = Var1, y = Var2, fill = Weight)) + geom_tile() + scale_fill_gradient2() + theme(axis.text.x = element_text(angle = 90, hjust = 1))
plot(plot.se.oxycline)
ggsave("figures/spieceasi_alldomains_oxycline.eps",plot.se.oxycline, width = 35, height = 35, units = c("in"))
bac_arch_common <- intersect(sample_names(ps_bac_anoxic_pruned), sample_names(ps_arch_anoxic_pruned))
all_common <- intersect(bac_arch_common, sample_names(ps_euk_anoxic_pruned))
ps_bac_anoxic_pruned <- prune_samples(all_common, ps_bac_anoxic_pruned)
ps_arch_anoxic_pruned <- prune_samples(all_common, ps_arch_anoxic_pruned)
ps_euk_anoxic_pruned <- prune_samples(all_common, ps_euk_anoxic_pruned)
otu_table(ps_arch_anoxic_pruned) <- otu_table(ps_arch_anoxic_pruned)[,sample_names(ps_bac_anoxic_pruned)]
otu_table(ps_euk_anoxic_pruned) <- otu_table(ps_euk_anoxic_pruned)[,sample_names(ps_bac_anoxic_pruned)]
sample_data(ps_bac_anoxic_pruned)
sample_data(ps_arch_anoxic_pruned)
sample_data(ps_euk_anoxic_pruned)
#Run Spieceasi
pargs <- list(seed=10010)
se.anoxic <- spiec.easi(list(ps_bac_anoxic_pruned, ps_arch_anoxic_pruned, ps_euk_anoxic_pruned), method='glasso', lambda.min.ratio=1e-1, nlambda=300, pulsar.params=pargs)
getStability(se.anoxic)
the above takes a couple of minutes to run
#This is just a fancy helper function to get the data in a comparable format to the output of above
tab.se.anoxic <- convertSEToTable(se.anoxic, sp.names=colnames(t(alldomains_df_full_anoxic)))
#Plot
plot.se.anoxic <- ggplot(tab.se.anoxic,aes(x = Var1, y = Var2, fill = Weight)) + geom_tile() + scale_fill_gradient2() + theme(axis.text.x = element_text(angle = 90, hjust = 1))
plot(plot.se.anoxic)
ggsave("figures/spieceasi_alldomains_anoxic.eps",plot.se.anoxic, width = 35, height = 35, units = c("in"))
bac_arch_common <- intersect(sample_names(ps_bac_euxinic_pruned), sample_names(ps_arch_euxinic_pruned))
all_common <- intersect(bac_arch_common, sample_names(ps_euk_euxinic_pruned))
ps_bac_euxinic_pruned <- prune_samples(all_common, ps_bac_euxinic_pruned)
ps_arch_euxinic_pruned <- prune_samples(all_common, ps_arch_euxinic_pruned)
ps_euk_euxinic_pruned <- prune_samples(all_common, ps_euk_euxinic_pruned)
otu_table(ps_arch_euxinic_pruned) <- otu_table(ps_arch_euxinic_pruned)[,sample_names(ps_bac_euxinic_pruned)]
otu_table(ps_euk_euxinic_pruned) <- otu_table(ps_euk_euxinic_pruned)[,sample_names(ps_bac_euxinic_pruned)]
sample_data(ps_bac_euxinic_pruned)
sample_data(ps_arch_euxinic_pruned)
sample_data(ps_euk_euxinic_pruned)
#Run Spieceasi
pargs <- list(seed=10010)
se.euxinic <- spiec.easi(list(ps_bac_euxinic_pruned, ps_arch_euxinic_pruned, ps_euk_euxinic_pruned), method='glasso', lambda.min.ratio=1e-5,nlambda=20, pulsar.params=pargs)
getStability(se.euxinic)
I tried many parameters on the above but cannot get a satisfactory solution. There are just too few samples after quality filtering to do SpiecEasi on the euxinic depths only.
# save.image("EnvironmentBackups/CariacoEuks_postanalysis_vars_upto_spieceasi.RData")
Or load if coming back
load("EnvironmentBackups/CariacoEuks_postanalysis_vars_upto_spieceasi.RData")
Build networks from the SpiecEasi association matrices using iGraph
#Extract adjacency matrix from spiecEasi output
adj.mat <- getRefit(se)
table(as.numeric(adj.mat))
0 1
83721 5680
# Extract weighted adjacency
se.cor <- cov2cor(as.matrix(getOptCov(se)))
weighted.adj.mat <- se.cor*getRefit(se)
#Convert to graph objects
grph.unweighted <- adj2igraph(adj.mat)
grph <- adj2igraph(weighted.adj.mat)
# Put back in species names
V(grph)$name <- rownames(alldomains_df)
# V(grph)
# Make size of nodes proportional to degree (number of connections)
V(grph)$size <- (degree(grph) + 1) # the +1 avoids size zero vertices
# Change width of edges to be proportional to their weights
E(grph)$width <- abs(E(grph)$weight)*10
# Scale node sizes to be smaller
V(grph)$size <- V(grph)$size/2
# Remove low-weight edges (you decide what threshold is right for your network):
# weight_threshold <- 0.07
# grph <- delete.edges(grph,which(abs(E(grph)$weight)<weight_threshold))
# Join taxonomy data of each node
# Convert graph to datafram
grph_df <- igraph::as_data_frame(grph, 'both')
# make formatted taxonomy table for each domain
ps_bac_pruned_tax_table <- as.data.frame(tax_table(ps_bac_pruned)) %>%
mutate(name = rownames(tax_table(ps_bac_pruned)))
ps_arch_pruned_tax_table <- as.data.frame(tax_table(ps_arch_pruned)) %>%
mutate(name = rownames(tax_table(ps_arch_pruned)))
ps_euk_pruned_tax_table <- as.data.frame(tax_table(ps_euk_pruned)) %>%
mutate(name = rownames(tax_table(ps_euk_pruned)))
# link graph data frame to formatted taxonomy tables
bac_temp <- left_join(grph_df$vertices[1:ntaxa(ps_bac_pruned),],
ps_bac_pruned_tax_table, by = "name")
# delete columns that don't match other tax tables
bac_temp <- select(bac_temp, -"taxonomy-9", -"Refined taxonomy")
arch_temp <- left_join(grph_df$vertices[ntaxa(ps_bac_pruned)+1:ntaxa(ps_arch_pruned),],
ps_arch_pruned_tax_table, by = "name")
euk_temp <- left_join(grph_df$vertices[ntaxa(ps_bac_pruned)+ntaxa(ps_arch_pruned)+1:ntaxa(ps_euk_pruned),],
ps_euk_pruned_tax_table, by = "name")
# rename column names in euk table to match others
euk_temp <- euk_temp %>%
rename("taxonomy-1" = Kingdom, "taxonomy-2" = Supergroup, "taxonomy-3" = Division, "taxonomy-4" = Class, "taxonomy-5" = Order, "taxonomy-6" = Family, "taxonomy-7" = Genus, "taxonomy-8" = Species)
# build full dataframe with all 3 domains
all_temp <- rbind(bac_temp, arch_temp, euk_temp)
# remake into graph
grph <- graph_from_data_frame(grph_df$edges,
directed = F,
vertices = all_temp)
# Make color palette for domain
dtype = c("red", "green", "blue", "yellow")
# Make color vector
domain_color <- dtype[as.numeric(as.factor(V(grph)$"taxonomy-1"))]
# check
domain_color
[1] "green" "green" "green" "green" "green" "green" "green" "green" "green" "green" "green" "green"
[13] "green" "green" "green" "green" "green" "green" "green" "green" "green" "green" "green" "green"
[25] "green" "green" "green" "green" "green" "green" "green" "green" "green" "green" "green" "green"
[37] "green" "green" "green" "green" "green" "green" "green" "green" "green" "green" "green" "green"
[49] "green" "green" "green" "green" "green" "green" "green" "green" "green" "green" "green" "green"
[61] "green" "green" "green" "green" "green" "green" "green" "green" "green" "green" "green" "green"
[73] "green" "green" "green" "green" "green" "green" "green" "green" "green" "green" "green" "green"
[85] "green" "green" "green" "green" "green" "green" "green" "green" "green" "green" "yellow" "green"
[97] "green" "green" "green" "green" "green" "green" "green" "green" "green" "green" "green" "green"
[109] "green" "green" "green" "green" "green" "green" "green" "green" "green" "green" "green" "green"
[121] "green" "green" "green" "green" "red" "red" "red" "red" "red" "red" "red" "red"
[133] "red" "red" "red" "red" "red" "red" "red" "red" "red" "red" "red" "red"
[145] "red" "red" "red" "red" "red" "red" "red" "red" "red" "red" "red" "red"
[157] "red" "red" "red" "red" "red" "red" "red" "red" "red" "red" "red" "red"
[169] "red" "red" "red" "red" "red" "red" "red" "red" "blue" "blue" "blue" "blue"
[181] "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue"
[193] "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue"
[205] "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue"
[217] "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue"
[229] "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue"
[241] "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue"
[253] "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue"
[265] "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue"
[277] "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue"
[289] "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue" "blue"
as.factor(V(grph)$"taxonomy-1")
[1] Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria
[9] Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria
[17] Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria
[25] Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria
[33] Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria
[41] Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria
[49] Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria
[57] Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria
[65] Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria
[73] Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria
[81] Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria
[89] Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria No blast hit Bacteria
[97] Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria
[105] Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria
[113] Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria Bacteria
[121] Bacteria Bacteria Bacteria Bacteria Archaea Archaea Archaea Archaea
[129] Archaea Archaea Archaea Archaea Archaea Archaea Archaea Archaea
[137] Archaea Archaea Archaea Archaea Archaea Archaea Archaea Archaea
[145] Archaea Archaea Archaea Archaea Archaea Archaea Archaea Archaea
[153] Archaea Archaea Archaea Archaea Archaea Archaea Archaea Archaea
[161] Archaea Archaea Archaea Archaea Archaea Archaea Archaea Archaea
[169] Archaea Archaea Archaea Archaea Archaea Archaea Archaea Archaea
[177] Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota
[185] Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota
[193] Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota
[201] Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota
[209] Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota
[217] Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota
[225] Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota
[233] Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota
[241] Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota
[249] Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota
[257] Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota
[265] Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota
[273] Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota
[281] Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota
[289] Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota Eukaryota
[297] Eukaryota Eukaryota Eukaryota
Levels: Archaea Bacteria Eukaryota No blast hit
# Plot
plot(grph,
vertex.label=NA,
layout=layout_with_graphopt(grph),
vertex.color=domain_color)
title("SpiecEasi Network: All domains, Whole Water Column")
legend("topright",bty = "n",
legend=c("Archaea","Bacteria", "Eukarya", "No Blast Hit"),
fill=c("red", "green", "blue", "yellow"), border=NA)
# Save plot
setEPS()
postscript(file = "Figures/3domains_alldepths_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph,
vertex.label=NA,
layout=layout_with_graphopt(grph),
vertex.color=domain_color)
title("SpiecEasi Network: All domains, Whole Water Column")
legend("topright",bty = "n",
legend=c("Archaea","Bacteria", "Eukarya", "No Blast Hit"),
fill=c("red", "green", "blue", "yellow"), border=NA)
dev.off()
quartz_off_screen
2
# Subset based on pos or neg edges
grph.pos <-delete.edges(grph, which(E(grph)$weight<0))
grph.neg <-delete.edges(grph, which(E(grph)$weight>0))
# For each subsetted graph, remove those nodes that are no longer connected to anything
grph.pos <- delete.vertices(grph.pos, which(degree(grph.pos)==0))
grph.neg <- delete.vertices(grph.neg, which(degree(grph.neg)==0))
# Make color vector for each
domain_color_pos <- dtype[as.numeric(as.factor(V(grph.pos)$"taxonomy-1"))]
domain_color_neg <- dtype[as.numeric(as.factor(V(grph.neg)$"taxonomy-1"))]
# Plot pos
plot(grph.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.pos),
vertex.color=domain_color_pos)
title("SpiecEasi Network: All domains, Positive Edges, Whole Water Column")
legend("topright",bty = "n",
legend=c("Archaea","Bacteria", "Eukarya", "No Blast Hit"),
fill=c("red", "green", "blue", "yellow"), border=NA)
# Plot neg
plot(grph.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.neg),
vertex.color=domain_color_neg)
title("SpiecEasi Network: All domains, Negative Edges, Whole Water Column")
legend("topright",bty = "n",
legend=c("Archaea","Bacteria", "Eukarya", "No Blast Hit"),
fill=c("red", "green", "blue", "yellow"), border=NA)
# Save plots
setEPS()
postscript(file = "Figures/3domains_alldepths_posedges_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.pos),
vertex.color=domain_color_pos)
title("SpiecEasi Network: All domains, Positive Edges, Whole Water Column")
legend("topright",bty = "n",
legend=c("Bacteria","Archaea", "Eukarya"),
fill=c("red","green","blue"), border=NA)
dev.off()
quartz_off_screen
2
setEPS()
postscript(file = "Figures/3domains_alldepths_negedges_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.neg),
vertex.color=domain_color_neg)
title("SpiecEasi Network: All domains, Negative Edges, Whole Water Column")
legend("topright",bty = "n",
legend=c("Archaea","Bacteria", "Eukarya", "No Blast Hit"),
fill=c("red", "green", "blue", "yellow"), border=NA)
dev.off()
quartz_off_screen
2
Remove eukaryotes to see impact on network
#Extract adjacency matrix from spiecEasi output
adj.mat <- getRefit(se.2domains)
table(as.numeric(adj.mat))
0 1
28568 2408
# Extract weighted adjacency
se.cor <- cov2cor(as.matrix(getOptCov(se.2domains)))
weighted.adj.mat <- se.cor*getRefit(se.2domains)
#Convert to graph objects
grph.unweighted <- adj2igraph(adj.mat)
grph.2domains <- adj2igraph(weighted.adj.mat)
# Put back in species names
V(grph.2domains)$name <- rownames(twodomains_df)
# V(grph.2domains)
# Make size of nodes proportional to degree (number of connections)
V(grph.2domains)$size <- (degree(grph.2domains) + 1) # the +1 avoids size zero vertices
# Color edges by connection (positive or negative)
# E(grph.2domains)$color <- custombluegreen
# E(grph.2domains)$color[E(grph.2domains)$weight<0] <- customreddishpurple
# Change width of edges to be proportional to their weights
E(grph.2domains)$width <- abs(E(grph.2domains)$weight)*10
# Scale node sizes to be smaller
V(grph.2domains)$size <- V(grph.2domains)$size/2
# Remove low-weight edges (you decide what threshold is right for your network):
# weight_threshold <- 0.07
# grph.2domains <- delete.edges(grph.2domains,which(abs(E(grph.2domains)$weight)<weight_threshold))
# Join taxonomy data of each node
# Convert graph to datafram
grph.2domains_df <- igraph::as_data_frame(grph.2domains, 'both')
# make formatted taxonomy table for each domain
ps_bac_pruned_2domains_tax_table <- as.data.frame(tax_table(ps_bac_pruned_2domains)) %>%
mutate(name = rownames(tax_table(ps_bac_pruned_2domains)))
ps_arch_2domains_pruned_tax_table <- as.data.frame(tax_table(ps_arch_pruned_2domains)) %>%
mutate(name = rownames(tax_table(ps_arch_pruned_2domains)))
# link graph data frame to formatted taxonomy tables
bac_temp <- left_join(grph.2domains_df$vertices[1:ntaxa(ps_bac_pruned_2domains),],
ps_bac_pruned_2domains_tax_table, by = "name")
# delete columns that don't match other tax tables
bac_temp <- select(bac_temp, -"taxonomy-9", -"Refined taxonomy")
arch_temp <- left_join(grph.2domains_df$vertices[ntaxa(ps_bac_pruned_2domains)+1:ntaxa(ps_arch_pruned_2domains),], ps_arch_2domains_pruned_tax_table, by = "name")
# build full dataframe with all 3 domains
all_temp <- rbind(bac_temp, arch_temp)
# remake into graph
grph.2domains <- graph_from_data_frame(grph.2domains_df$edges,
directed = F,
vertices = all_temp)
# Make color paletter for domain
dtype = c("red", "green", "yellow")
# Make color vector
domain_color_2domains <- dtype[as.numeric(as.factor(V(grph.2domains)$"taxonomy-1"))]
# Plot
plot(grph.2domains,
vertex.label=NA,
layout=layout_with_graphopt(grph.2domains),
vertex.color=domain_color_2domains)
title("SpiecEasi Network: Bacteria and Archaea, Whole Water Column")
legend("topright",bty = "n",
legend=c("Archaea","Bacteria", "No blast hit"),
fill=c("red","green", "yellow"), border=NA)
# Save plot
setEPS()
postscript(file = "Figures/2domains_alldepths_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.2domains,
vertex.label=NA,
layout=layout_with_graphopt(grph.2domains),
vertex.color=domain_color_2domains)
title("SpiecEasi Network: Bacteria and Archaea, Whole Water Column")
legend("topright",bty = "n",
legend=c("Archaea","Bacteria", "No blast hit"),
fill=c("red","green", "yellow"), border=NA)
dev.off()
quartz_off_screen
2
# Subset based on pos or neg edges
grph.2domains.pos <-delete.edges(grph.2domains, which(E(grph.2domains)$weight<0))
grph.2domains.neg <-delete.edges(grph.2domains, which(E(grph.2domains)$weight>0))
# For each subsetted graph, remove those nodes that are no longer connected to anything
grph.2domains.pos <- delete.vertices(grph.2domains.pos, which(degree(grph.2domains.pos)==0))
grph.2domains.neg <- delete.vertices(grph.2domains.neg, which(degree(grph.2domains.neg)==0))
# Make color vector for each
domain_color_2domains_pos <- dtype[as.numeric(as.factor(V(grph.2domains.pos)$"taxonomy-1"))]
domain_color_2domains_neg <- dtype[as.numeric(as.factor(V(grph.2domains.neg)$"taxonomy-1"))]
# Plot pos
plot(grph.2domains.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.2domains.pos),
vertex.color=domain_color_2domains_pos)
title("SpiecEasi Network: Bacteria and Archaea, Positive Edges Only, Whole Water Column")
legend("topright",bty = "n",
legend=c("Archaea","Bacteria", "No Blast Hit"),
fill=c("green","red","yellow"), border=NA)
# Plot neg
plot(grph.2domains.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.2domains.neg),
vertex.color=domain_color_2domains_neg)
title("SpiecEasi Network: Bacteria and Archaea, Negative Edges Only, Whole Water Column")
legend("topright",bty = "n",
legend=c("Archaea","Bacteria", "No Blast Hit"),
fill=c("green","red","yellow"), border=NA)
# Save plots
setEPS()
postscript(file = "Figures/2domains_alldepths_posedges_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.2domains.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.2domains.pos),
vertex.color=domain_color_2domains_pos)
title("SpiecEasi Network: Bacteria and Archaea, Positive Edges Only, Whole Water Column")
legend("topright",bty = "n",
legend=c("Archaea","Bacteria", "No Blast Hit"),
fill=c("green","red","yellow"), border=NA)
dev.off()
quartz_off_screen
2
setEPS()
postscript(file = "Figures/2domains_alldepths_negedges_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.2domains.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.2domains.neg),
vertex.color=domain_color_2domains_neg)
title("SpiecEasi Network: Bacteria and Archaea, Negative Edges Only, Whole Water Column")
legend("topright",bty = "n",
legend=c("Archaea","Bacteria", "No Blast Hit"),
fill=c("green","red","yellow"), border=NA)
dev.off()
quartz_off_screen
2
#Extract adjacency matrix from spiecEasi output
adj.mat <- getRefit(se.oxycline)
table(as.numeric(adj.mat))
# Extract weighted adjacency
se.cor <- cov2cor(as.matrix(getOptCov(se.oxycline)))
weighted.adj.mat <- se.cor*getRefit(se.oxycline)
#Convert to graph objects
grph.unweighted.oxycline <- adj2igraph(adj.mat)
grph.oxycline <- adj2igraph(weighted.adj.mat)
# Put back in species names
V(grph.oxycline)$name <- rownames(alldomains_df_oxycline)
# V(grph.oxycline)
# Make size of nodes proportional to degree (number of connections)
V(grph.oxycline)$size <- (degree(grph.oxycline) + 1) # the +1 avoids size zero vertices
# Color edges by connection (positive or negative)
# E(grph.oxycline)$color <- custombluegreen
# E(grph.oxycline)$color[E(grph.oxycline)$weight<0] <- customreddishpurple
# Change width of edges to be proportional to their weights
E(grph.oxycline)$width <- abs(E(grph.oxycline)$weight)*10
# Scale node sizes to be smaller
V(grph.oxycline)$size <- V(grph.oxycline)$size/2
# Remove low-weight edges (you decide what threshold is right for your network):
# weight_threshold <- 0.07
# grph.oxycline <- delete.edges(grph.oxycline,which(abs(E(grph.oxycline)$weight)<weight_threshold))
# Join taxonomy data of each node
# Convert graph to datafram
grph.oxycline_df <- igraph::as_data_frame(grph.oxycline, 'both')
# make formatted taxonomy table for each domain
ps_bac_oxycline_pruned_tax_table <- as.data.frame(tax_table(ps_bac_oxycline_pruned)) %>%
mutate(name = rownames(tax_table(ps_bac_oxycline_pruned)))
ps_arch_oxycline_pruned_tax_table <- as.data.frame(tax_table(ps_arch_oxycline_pruned)) %>%
mutate(name = rownames(tax_table(ps_arch_oxycline_pruned)))
ps_euk_oxycline_pruned_tax_table <- as.data.frame(tax_table(ps_euk_oxycline_pruned)) %>%
mutate(name = rownames(tax_table(ps_euk_oxycline_pruned)))
# link graph data frame to formatted taxonomy tables
bac_temp <- left_join(grph.oxycline_df$vertices[1:ntaxa(ps_bac_oxycline_pruned),],
ps_bac_oxycline_pruned_tax_table, by = "name")
# delete columns that don't match other tax tables
bac_temp <- select(bac_temp, -"taxonomy-9", -"Refined taxonomy")
arch_temp <- left_join(grph.oxycline_df$vertices[ntaxa(ps_bac_oxycline_pruned)+1:ntaxa(ps_arch_oxycline_pruned),],ps_arch_oxycline_pruned_tax_table, by = "name")
euk_temp <- left_join(grph.oxycline_df$vertices[ntaxa(ps_bac_oxycline_pruned)+ntaxa(ps_arch_oxycline_pruned)+1:ntaxa(ps_euk_oxycline_pruned),], ps_euk_oxycline_pruned_tax_table, by = "name")
# rename column names in euk table to match others
euk_temp <- euk_temp %>%
rename("taxonomy-1" = Kingdom, "taxonomy-2" = Supergroup, "taxonomy-3" = Division, "taxonomy-4" = Class, "taxonomy-5" = Order, "taxonomy-6" = Family, "taxonomy-7" = Genus, "taxonomy-8" = Species)
# build full dataframe with all 3 domains
all_temp <- rbind(bac_temp, arch_temp, euk_temp)
# remake into graph
grph.oxycline <- graph_from_data_frame(grph.oxycline_df$edges,
directed = F,
vertices = all_temp)
# Make color paletter for domain
dtype = c("red", "green", "blue")
# Make color vector
domain_color_oxycline <- dtype[as.numeric(as.factor(V(grph.oxycline)$"taxonomy-1"))]
# Plot
plot(grph.oxycline,
vertex.label=NA,
layout=layout_with_graphopt(grph.oxycline),
vertex.color=domain_color_oxycline)
title("SpiecEasi Network: All domains, Oxycline")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya"),
fill=c("red","green","blue"), border=NA)
# Save plot
setEPS()
postscript(file = "Figures/3domains_oxycline_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.oxycline,
vertex.label=NA,
layout=layout_with_graphopt(grph.oxycline),
vertex.color=domain_color_oxycline)
title("SpiecEasi Network: All domains, Oxycline")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya"),
fill=c("red","green","blue"), border=NA)
dev.off()
# Subset based on pos or neg edges
grph.oxycline.pos <-delete.edges(grph.oxycline, which(E(grph.oxycline)$weight<0))
grph.oxycline.neg <-delete.edges(grph.oxycline, which(E(grph.oxycline)$weight>0))
# For each subsetted graph, remove those nodes that are no longer connected to anything
grph.oxycline.pos <- delete.vertices(grph.oxycline.pos, which(degree(grph.oxycline.pos)==0))
grph.oxycline.neg <- delete.vertices(grph.oxycline.neg, which(degree(grph.oxycline.neg)==0))
# Make color vector for each
domain_color_oxycline_pos <- dtype[as.numeric(as.factor(V(grph.oxycline.pos)$"taxonomy-1"))]
domain_color_oxycline_neg <- dtype[as.numeric(as.factor(V(grph.oxycline.neg)$"taxonomy-1"))]
# Plot pos
plot(grph.oxycline.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.oxycline.pos),
vertex.color=domain_color_oxycline_pos)
title("SpiecEasi Network: All domains, Positive Edges only, Oxycline")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya"),
fill=c("red","green","blue"), border=NA)
# Plot neg
plot(grph.oxycline.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.oxycline.neg),
vertex.color=domain_color_oxycline_neg)
title("SpiecEasi Network: All domains, Negative Edges Only, Oxycline")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya"),
fill=c("red","green","blue"), border=NA)
# Save plots
setEPS()
postscript(file = "Figures/3domains_oxycline_posedges_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.oxycline.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.oxycline.pos),
vertex.color=domain_color_oxycline_pos)
title("SpiecEasi Network: All domains, Positive Edges Only, Oxycline")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya"),
fill=c("red","green","blue"), border=NA)
dev.off()
setEPS()
postscript(file = "Figures/3domains_oxycline_negedges_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.oxycline.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.oxycline.neg),
vertex.color=domain_color_oxycline_neg)
title("SpiecEasi Network: All domains, Negative Edges Only, Oxycline")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya"),
fill=c("red","green","blue"), border=NA)
dev.off()
#Extract adjacency matrix from spiecEasi output
adj.mat <- getRefit(se.anoxic)
table(as.numeric(adj.mat))
# Extract weighted adjacency
se.cor <- cov2cor(as.matrix(getOptCov(se.anoxic)))
weighted.adj.mat <- se.cor*getRefit(se.anoxic)
#Convert to graph objects
grph.unweighted.anoxic <- adj2igraph(adj.mat)
grph.anoxic <- adj2igraph(weighted.adj.mat)
# Put back in species names
V(grph.anoxic)$name <- rownames(alldomains_df_anoxic)
# V(grph.anoxic)
# Make size of nodes proportional to degree (number of connections)
V(grph.anoxic)$size <- (degree(grph.anoxic) + 1) # the +1 avoids size zero vertices
# Color edges by connection (positive or negative)
# E(grph.anoxic)$color <- custombluegreen
# E(grph.anoxic)$color[E(grph.anoxic)$weight<0] <- customreddishpurple
# Change width of edges to be proportional to their weights
E(grph.anoxic)$width <- abs(E(grph.anoxic)$weight)*10
# Scale node sizes to be smaller
V(grph.anoxic)$size <- V(grph.anoxic)$size/2
# Remove low-weight edges (you decide what threshold is right for your network):
# weight_threshold <- 0.07
# grph.anoxic <- delete.edges(grph.anoxic,which(abs(E(grph.anoxic)$weight)<weight_threshold))
# Join taxonomy data of each node
# Convert graph to datafram
grph.anoxic_df <- igraph::as_data_frame(grph.anoxic, 'both')
# make formatted taxonomy table for each domain
ps_bac_anoxic_pruned_tax_table <- as.data.frame(tax_table(ps_bac_anoxic_pruned)) %>%
mutate(name = rownames(tax_table(ps_bac_anoxic_pruned)))
ps_arch_anoxic_pruned_tax_table <- as.data.frame(tax_table(ps_arch_anoxic_pruned)) %>%
mutate(name = rownames(tax_table(ps_arch_anoxic_pruned)))
ps_euk_anoxic_pruned_tax_table <- as.data.frame(tax_table(ps_euk_anoxic_pruned)) %>%
mutate(name = rownames(tax_table(ps_euk_anoxic_pruned)))
# link graph data frame to formatted taxonomy tables
bac_temp <- left_join(grph.anoxic_df$vertices[1:ntaxa(ps_bac_anoxic_pruned),],ps_bac_anoxic_pruned_tax_table, by = "name")
# delete columns that don't match other tax tables
bac_temp <- select(bac_temp, -"taxonomy-9", -"Refined taxonomy")
arch_temp <- left_join(grph.anoxic_df$vertices[ntaxa(ps_bac_anoxic_pruned)+1:ntaxa(ps_arch_anoxic_pruned),],ps_arch_anoxic_pruned_tax_table, by = "name")
euk_temp <- left_join(grph.anoxic_df$vertices[ntaxa(ps_bac_anoxic_pruned)+ntaxa(ps_arch_anoxic_pruned)+1:ntaxa(ps_euk_anoxic_pruned),], ps_euk_anoxic_pruned_tax_table, by = "name")
# rename column names in euk table to match others
euk_temp <- euk_temp %>%
rename("taxonomy-1" = Kingdom, "taxonomy-2" = Supergroup, "taxonomy-3" = Division, "taxonomy-4" = Class, "taxonomy-5" = Order, "taxonomy-6" = Family, "taxonomy-7" = Genus, "taxonomy-8" = Species)
# build full dataframe with all 3 domains
all_temp <- rbind(bac_temp, arch_temp, euk_temp)
# remake into graph
grph.anoxic <- graph_from_data_frame(grph.anoxic_df$edges,
directed = F,
vertices = all_temp)
# Make color paletter for domain
dtype = c("red", "green", "blue")
# Make color vector
domain_color_anoxic <- dtype[as.numeric(as.factor(V(grph.anoxic)$"taxonomy-1"))]
# Plot
plot(grph.anoxic,
vertex.label=NA,
layout=layout_with_graphopt(grph.anoxic),
vertex.color=domain_color_anoxic)
title("SpiecEasi Network: All domains, Anoxic Layer")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya"),
fill=c("red","green","blue"), border=NA)
# Save plot
setEPS()
postscript(file = "Figures/3domains_anoxic_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.anoxic,
vertex.label=NA,
layout=layout_with_graphopt(grph.anoxic),
vertex.color=domain_color_anoxic)
title("SpiecEasi Network: All domains, Anoxic")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya"),
fill=c("red","green","blue"), border=NA)
dev.off()
# Subset based on pos or neg edges
grph.anoxic.pos <-delete.edges(grph.anoxic, which(E(grph.anoxic)$weight<0))
grph.anoxic.neg <-delete.edges(grph.anoxic, which(E(grph.anoxic)$weight>0))
# For each subsetted graph, remove those nodes that are no longer connected to anything
grph.anoxic.pos <- delete.vertices(grph.anoxic.pos, which(degree(grph.anoxic.pos)==0))
grph.anoxic.neg <- delete.vertices(grph.anoxic.neg, which(degree(grph.anoxic.neg)==0))
# Make color vector for each
domain_color_anoxic_pos <- dtype[as.numeric(as.factor(V(grph.anoxic.pos)$"taxonomy-1"))]
domain_color_anoxic_neg <- dtype[as.numeric(as.factor(V(grph.anoxic.neg)$"taxonomy-1"))]
# Plot pos
plot(grph.anoxic.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.anoxic.pos),
vertex.color=domain_color_anoxic_pos)
title("SpiecEasi Network: All domains, Positive Edges only, anoxic")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya"),
fill=c("red","green","blue"), border=NA)
# Plot neg
plot(grph.anoxic.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.anoxic.neg),
vertex.color=domain_color_anoxic_neg)
title("SpiecEasi Network: All domains, Negative Edges Only, anoxic")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya"),
fill=c("red","green","blue"), border=NA)
# Save plots
setEPS()
postscript(file = "Figures/3domains_anoxic_posedges_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.anoxic.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.anoxic.pos),
vertex.color=domain_color_anoxic_pos)
title("SpiecEasi Network: All domains, Positive Edges Only, Shallow Anoxic")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya"),
fill=c("red","green","blue"), border=NA)
dev.off()
setEPS()
postscript(file = "Figures/3domains_anoxic_negedges_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.anoxic.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.anoxic.neg),
vertex.color=domain_color_anoxic_neg)
title("SpiecEasi Network: All domains, Negative Edges Only, Shallow Anoxic")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya"),
fill=c("red","green","blue"), border=NA)
dev.off()
# Set up in panels
op <- par(oma=c(2,.5,.5,0),# Room for the titles and legend
mfrow=c(4,2),
mai=c(.15,.3,.15,.1))
# Panel 1- All depths, positive network
plot(grph.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.pos),
vertex.color=domain_color_pos)
mtext ("Positive", side = 3, outer = TRUE, line = -1, adj = 0.22, cex = .8)
mtext("All depths", side=2, cex = .8, line = 1.5)
# Panel 2- All depths, negative network
plot(grph.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.neg),
vertex.color=domain_color_neg)
mtext ("Negative", side = 3, outer = TRUE, line = -1, adj = .8, cex = .8)
# Panel 3- Oxycline, positive network
plot(grph.oxycline.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.oxycline.pos),
vertex.color=domain_color_oxycline_pos)
mtext("Oxycline", side=2, cex = .8, line = 1.5)
# Panel 4- Oxycline, negative network
plot(grph.oxycline.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.oxycline.neg),
vertex.color=domain_color_oxycline_neg)
# Panel 5- Anoxic, positive network
plot(grph.anoxic.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.anoxic.pos),
vertex.color=domain_color_anoxic_pos)
mtext("Anoxic", side=2, cex = .8, line = 1.5)
# Panel 6- Anoxic, negative network
plot(grph.anoxic.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.anoxic.neg),
vertex.color=domain_color_anoxic_neg)
# Panel 7- 2 Domains, positive network
plot(grph.2domains.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.2domains.pos),
vertex.color=domain_color_2domains_pos)
mtext("Prok Only", side=2, cex = .8, line = 1.5)
# Panel 8- 2 Domains, negative network
plot(grph.2domains.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.2domains.pos),
vertex.color=domain_color_2domains_neg)
# Add legend
par(op) # Leave the last plot
op <- par(usr=c(0,1,0,1), # Reset the coordinates
xpd=NA) # Allow plotting outside the plot region
legend(.15,-0.04, c("Archaea","Bacteria", "Eukarya", "No Blast Hit"), col=c("red", "green", "blue", "yellow"), pch = c(16), box.col=NA, cex = .8, horiz = T, x.intersp = c(0.3))
# Save figure
# Set up EPS and make plot
setEPS(width = 6, height = 9)
postscript("Figures/Networks_pos_neg.eps")
op <- par(oma=c(2,.5,.5,0),# Room for the titles and legend
mfrow=c(4,2),
mai=c(.15,.3,.15,.1))
# Panel 1- All depths, positive network
plot(grph.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.pos),
vertex.color=domain_color_pos)
mtext ("Positive", side = 3, outer = TRUE, line = -1, adj = 0.22, cex = .8)
mtext("All depths", side=2, cex = .8, line = 1.5)
# Panel 2- All depths, negative network
plot(grph.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.neg),
vertex.color=domain_color_neg)
mtext ("Negative", side = 3, outer = TRUE, line = -1, adj = .8, cex = .8)
# Panel 3- Oxycline, positive network
plot(grph.oxycline.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.oxycline.pos),
vertex.color=domain_color_oxycline_pos)
mtext("Oxycline", side=2, cex = .8, line = 1.5)
# Panel 4- Oxycline, negative network
plot(grph.oxycline.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.oxycline.neg),
vertex.color=domain_color_oxycline_neg)
# Panel 5- Anoxic, positive network
plot(grph.anoxic.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.anoxic.pos),
vertex.color=domain_color_anoxic_pos)
mtext("Anoxic", side=2, cex = .8, line = 1.5)
# Panel 6- Anoxic, negative network
plot(grph.anoxic.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.anoxic.neg),
vertex.color=domain_color_anoxic_neg)
# Panel 7- 2 Domains, positive network
plot(grph.2domains.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.2domains.pos),
vertex.color=domain_color_2domains_pos)
mtext("Prok Only", side=2, cex = .8, line = 1.5)
# Panel 8- 2 Domains, negative network
plot(grph.2domains.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.2domains.pos),
vertex.color=domain_color_2domains_neg)
# Add legend
par(op) # Leave the last plot
op <- par(usr=c(0,1,0,1), # Reset the coordinates
xpd=NA) # Allow plotting outside the plot region
legend(.15,-0.04, c("Archaea","Bacteria", "Eukarya", "No Blast Hit"), col=c("red", "green", "blue", "yellow"), pch = c(16), box.col=NA, cex = .8, horiz = T, x.intersp = c(0.3))
dev.off()
the number of edges and how many are positive vs negative
# total number of edges in full dataset network
length(E(grph)$weight)
# percent of neg edges
(sum(E(grph)$weight<0)/length(E(grph)$weight))*100
# total number of edges in 2-domain dataset network
length(E(grph.2domains)$weight)
# percent of neg edges
(sum(E(grph.2domains)$weight<0)/length(E(grph.2domains)$weight))*100
# total number of edges in oxycline network
length(E(grph.oxycline)$weight)
# percent of neg edges
(sum(E(grph.oxycline)$weight<0)/length(E(grph.oxycline)$weight))*100
# total number of edges in anoxic network
length(E(grph.anoxic)$weight)
# percent of neg edges
(sum(E(grph.anoxic)$weight<0)/length(E(grph.anoxic)$weight))*100
Declining number of total edges going from full dataset –> oxycline only –> anoxic only. But the percentage of negative associations is similar (34.4-37.7%). Most associations (~65%) in each network are positive.
the number of edges relatives to total number of possible edges
edge_density(grph)*100
edge_density(grph.2domains)*100
edge_density(grph.oxycline)*100
edge_density(grph.anoxic)*100
The full dataset has the highest edge density, then oxycline, then anoxic
The size of the components, or “clumps,” in the network, and how many members in each
# full dataset
components(grph)$no
components(grph)$csize
# 2 domains
components(grph.2domains)$no
components(grph.2domains)$csize
# oxycline
components(grph.oxycline)$no
components(grph.oxycline)$csize
# anoxic
components(grph.anoxic)$no
components(grph.anoxic)$csize
The anoxic network is most disjointed, with 48 clumps and the largest containing only 24 members. The next is oxycline, with 32 clumps and the largest with 144 members. Then the full dataset has only 27 clumps and the largest clump contains 262 members.
Path is the shortest distance between two nodes (fewest number of edges). Average path length of a network gives a sense of how connected every node is to another. Unconnected hubs in the netowrk will have “infinite” paths from other hubs. The function mean_distance ignores the infinite edges and calculates the average of all other edges
mean_distance(grph)
mean_distance(grph.2domains)
mean_distance(grph.oxycline)
mean_distance(grph.anoxic)
The longest average path length is in the oxycline, followed by the whole dataset and then anoxic. Meaning the nodes in the anoxic are more closely associated with each other. Even though there are more hubs in anoxic, as shown above, the nodes in the hubs are close to each other. The oxycline hubs have the longest average distances between nodes.
# Positive network- full dataset
grph.pos_df <- igraph::as_data_frame(grph.pos, 'both')
grph.pos_df_vert <- grph.pos_df$vertices
# How many Syndiniales and Polycystinea?
as.data.frame(table(grph.pos_df_vert$"taxonomy-5"))
# 37 Dino-Group-II
# 3 Dino-Group-I
# 34 Spumellarida
# Negative network- full dataset
grph.neg_df <- igraph::as_data_frame(grph.neg, 'both')
grph.neg_df_vert <- grph.neg_df$vertices
# How many Syndiniales and Polycystinea?
as.data.frame(table(grph.neg_df_vert$"taxonomy-5"))
# 18 Dino-Group-II
# 4 Dino-Group-I
# 17 Spumellarida
# Positive network- oxycline
grph.oxycline.pos_df <- igraph::as_data_frame(grph.oxycline.pos, 'both')
grph.oxycline.pos_df_vert <- grph.oxycline.pos_df$vertices
# How many Syndiniales and Polycystinea?
as.data.frame(table(grph.oxycline.pos_df_vert$"taxonomy-5"))
# 34 Dino-Group-II
# 1 Dino-Group-I
# 24 Spumellarida
# Negative network- oxycline
grph.oxycline.neg_df <- igraph::as_data_frame(grph.oxycline.neg, 'both')
grph.oxycline.neg_df_vert <- grph.oxycline.neg_df$vertices
# How many Syndiniales and Polycystinea?
as.data.frame(table(grph.oxycline.neg_df_vert$"taxonomy-5"))
# 24 Dino-Group-II
# 1 Dino-Group-I
# 9 Spumellarida
# Positive network- anoxic
grph.anoxic.pos_df <- igraph::as_data_frame(grph.anoxic.pos, 'both')
grph.anoxic.pos_df_vert <- grph.anoxic.pos_df$vertices
# How many Syndiniales and Polycystinea?
as.data.frame(table(grph.anoxic.pos_df_vert$"taxonomy-5"))
# 0 Dino-Group-II
# 1 Dino-Group-I
# 8 Spumellarida
# Negative network- anoxic
grph.anoxic.neg_df <- igraph::as_data_frame(grph.anoxic.neg, 'both')
grph.anoxic.neg_df_vert <- grph.anoxic.neg_df$vertices
# How many Syndiniales and Polycystinea?
as.data.frame(table(grph.anoxic.neg_df_vert$"taxonomy-5"))
# 0 Dino-Group-II
# 0 Dino-Group-I
# 0 Spumellarida
# Positive associations: Syndiniales
# Pull out names of Syndiniales and Spumellarida ASVs
grph.pos_df_vert_synd <- filter(grph.pos_df_vert, `taxonomy-4` == "Syndiniales")
# filter graph to include only edges connected to those nodes
grph.pos_synd_edges <- E(grph.pos)[from(grph.pos_df_vert_synd$name)] # get edges
grph.pos.synd_subgraph <- subgraph.edges(grph.pos, grph.pos_synd_edges) # filter graph
# get taxonomy of remaining nodes, removing the Syndiniales from table (eg. only connected nodes) and grouping by taxonomy
grph.pos_df_vert %>%
filter(`name` %in% V(grph.pos.synd_subgraph)$name & !`taxonomy-4` %in% c("Syndiniales")) %>%
count(`taxonomy-2`,`taxonomy-3`,`taxonomy-4`,`taxonomy-5`, name = "count", sort = TRUE)
# Positive associations: Spumellarida
grph.pos_df_vert_spum <- filter(grph.pos_df_vert, `taxonomy-5` == "Spumellarida")
grph.pos_spum_edges <- E(grph.pos)[from(grph.pos_df_vert_spum$name)]
grph.pos.spum_subgraph <- subgraph.edges(grph.pos, grph.pos_spum_edges)
grph.pos_df_vert %>%
filter(`name` %in% V(grph.pos.spum_subgraph)$name & !`taxonomy-5` %in% c("Spumellarida")) %>%
count(`taxonomy-2`,`taxonomy-3`,`taxonomy-4`,`taxonomy-5`, name = "count", sort = TRUE)
# Negative associations: Syndiniales
grph.neg_df_vert_synd <- filter(grph.neg_df_vert, `taxonomy-4` == "Syndiniales")
grph.neg_synd_edges <- E(grph.neg)[from(grph.neg_df_vert_synd$name)]
grph.neg.synd_subgraph <- subgraph.edges(grph.neg, grph.neg_synd_edges)
grph.neg_df_vert %>%
filter(`name` %in% V(grph.neg.synd_subgraph)$name & !`taxonomy-4` %in% c("Syndiniales")) %>%
count(`taxonomy-2`,`taxonomy-3`,`taxonomy-4`,`taxonomy-5`, name = "count", sort = TRUE)
# Negative associations: Spumellarida
grph.neg_df_vert_spum <- filter(grph.neg_df_vert, `taxonomy-5` == "Spumellarida")
grph.neg_spum_edges <- E(grph.neg)[from(grph.neg_df_vert_spum$name)]
grph.neg.spum_subgraph <- subgraph.edges(grph.neg, grph.neg_spum_edges)
grph.neg_df_vert %>%
filter(`name` %in% V(grph.neg.spum_subgraph)$name & !`taxonomy-5` %in% c("Spumellarida")) %>%
count(`taxonomy-2`,`taxonomy-3`,`taxonomy-4`,`taxonomy-5`, name = "count", sort = TRUE)
# Also ckeck out Cariacotrichea to hypothesize about possible symbiosis partners
# Positive associations only
grph.pos_df_vert_cari <- filter(grph.pos_df_vert, `taxonomy-4` == "Cariacotrichea")
grph.pos_cari_edges <- E(grph.pos)[from(grph.pos_df_vert_cari$name)]
grph.pos.cari_subgraph <- subgraph.edges(grph.pos, grph.pos_cari_edges)
grph.pos_df_vert %>%
filter(`name` %in% V(grph.pos.cari_subgraph)$name & !`taxonomy-4` %in% c("Cariacotrichea"))%>%
count(`taxonomy-2`,`taxonomy-3`,`taxonomy-4`,`taxonomy-5`, name = "count", sort = TRUE)
Calculate 4 parameters for each individual node:
# First change the weights of the edges (the strength of association) to absolute value. This won't work if negative edge weights are left with the negative signs
E(grph)$weight <- abs(E(grph)$weight)
# calculate parameters
names=V(grph)$name
de=degree(grph)
st=graph.strength(grph)
be=betweenness(grph, normalized=T)
cc = closeness(grph)
l.cluster=transitivity(grph, "local")
# assemble dataset and match full taxonomy
fulldateset_node_measures <- data.frame(ID=names, degree=de, strength=st, betweenness=be, closeness = cc, clustering_coefficient = l.cluster)
# Put back bac taxaonomy
temp1 <- left_join(fulldateset_node_measures[1:dim(otu_table(ps_bac_pruned_3domains))[1],], bac_taxonomy, by = c("ID" = "#OTU ID"))
# delete "Taxonomy-9" and "refined Taxonomy" columns
temp1 <- select(temp1, -"taxonomy-9", -"Refined taxonomy")
temp2 <- left_join(fulldateset_node_measures[sum(dim(otu_table(ps_bac_pruned_3domains))[1],1):sum(dim(otu_table(ps_bac_pruned_3domains))[1],dim(otu_table(ps_arch_pruned_3domains))[1]),], arch_taxonomy, by = c("ID" = "#OTU ID"))
temp3 <- left_join(fulldateset_node_measures[sum(dim(otu_table(ps_arch_pruned_3domains))[1], dim(otu_table(ps_bac_pruned_3domains))[1],1):sum(dim(otu_table(ps_arch_pruned_3domains))[1], dim(otu_table(ps_bac_pruned_3domains))[1],dim(otu_table(ps_euk_pruned_3domains))[1]),], euk_taxonomy, by = c("ID" = "#ASV ID"))
# Rename col names to match those from Bac and Arch
temp3 <- temp3 %>%
rename("taxonomy-1" = Kingdom, "taxonomy-2" = Supergroup, "taxonomy-3" = Division, "taxonomy-4" = Class, "taxonomy-5" = Order, "taxonomy-6" = Family, "taxonomy-7" = Genus, "taxonomy-8" = Species)
# combine back all 3 domains, with new names as row names in a dataframe
fulldateset_node_measures <- rbind(temp1, temp2, temp3)
fulldateset_node_measures
Plot betweeness vs degree for each node. - Tipton et al. argue that nodes with high betweenness are “bottlenecks” or important connectors and nodes with high degree are “hubs” - Berry et al. argue that nodes with low betweenness, high degree, high closeness, and high transitivity are candidate keystone species - Add in closeness into the node’s plotly label since these don’t vary much node-to-node and wouldn’t make sense to plot
# replace NA in taxonomy with unidentified
# remove nodes with 0 betweenness (can't calculate log10 of 0)
# replace NaN clustering coefs with 0
fulldateset_node_measures <- fulldateset_node_measures %>%
replace(is.na(.), "unidentified") %>%
filter(!betweenness == 0)
# get enough colors and randomly rearrange so they are easier to separate on the plot
mycolors <- colorRampPalette(brewer.pal(12, "Paired"))(length(unique(fulldateset_node_measures$`taxonomy-3`)))
set.seed(123)
mycolors <- sample(mycolors)
# plot with plotly and so I can hover over points and determine which taxa they are
p <- ggplot(fulldateset_node_measures, aes(x = degree, y = betweenness, ID = ID, shape = `taxonomy-1`, `taxonomy-2` = `taxonomy-2`, color = `taxonomy-3`, `taxonomy-4` = `taxonomy-4`, `taxonomy-5` = `taxonomy-5`)) +
geom_point(size = 4) +
scale_y_continuous(trans='log10') +
scale_color_manual(values = mycolors) +
theme(legend.title = element_blank()) +
theme_bw()
p
ggplotly(p, tooltip = c("ID","taxonomy-2", "taxonomy-3", "taxonomy-4", "taxonomy-5"))
Make static figure for manuscript
p2 <- ggplot(fulldateset_node_measures, aes(x = degree, y = betweenness, shape = `taxonomy-1`, color = `taxonomy-3`)) +
geom_point(size = 4) +
scale_y_continuous(trans='log10') +
scale_color_manual(values = mycolors, name = "") +
scale_shape_manual(values = c(19,17,15,18), name = "") +
theme(legend.title=element_blank(),
axis.text = element_text(size=8),
axis.text.x = element_text(size=8, angle = 45, hjust = 1),
axis.title = element_text(size=8),
legend.text = element_text(size=8),
strip.text = element_text(size = 8),
legend.margin=margin(0,0,0,2),
legend.box.margin=margin(-10,-10,-10,-10),
plot.margin=grid::unit(c(0,0,0,0), "mm")) +
theme_bw()
p2
ggsave("figures/betweenness_vs_degree.eps",p2, width = 10, height = 6, units = c("in"))
de_df <- as.data.frame(de)
de_df$name <- rownames(de_df)
de_df <- left_join(de_df, all_temp, by = "name")
de_df %>%
group_by(`taxonomy-1`) %>%
summarise(degreesum = sum(de)) %>%
arrange(desc(degreesum))
de_df %>%
group_by(`taxonomy-2`) %>%
summarise(degreesum = sum(de)) %>%
arrange(desc(degreesum))
de_df %>%
group_by(`taxonomy-3`) %>%
summarise(degreesum = sum(de)) %>%
arrange(desc(degreesum))
de_df %>%
group_by(`taxonomy-4`) %>%
summarise(degreesum = sum(de)) %>%
arrange(desc(degreesum))
de_df %>%
group_by(`taxonomy-5`) %>%
summarise(degreesum = sum(de)) %>%
arrange(desc(degreesum))
# save.image("EnvironmentBackups/CariacoEuks_postanalysis_vars_upto_nodelevelmeasures.RData")
Or load if coming back
load("EnvironmentBackups/CariacoEuks_postanalysis_vars_upto_nodelevelmeasures.RData")
In response to reviews, re-run some analyses
Make all networks same size so that they are directly comparable-
Make subsets that will allow for controlling for network size later on - top 100 euks, bac, arch (which will be combined into a 300-member dataset) - top 150 bac and arch (which will be combined into a 300-member dataset of Proks only)
# Retain top 100 most abundant bacteria
bac_top100 <- names(sort(taxa_sums(ps_bac_ra), TRUE)[1:100])
ps_bac_top100 <- prune_taxa(bac_top100, ps_bac)
# Retain top 150 most abundant bacteria
bac_top150 <- names(sort(taxa_sums(ps_bac_ra), TRUE)[1:150])
ps_bac_top150 <- prune_taxa(bac_top150, ps_bac)
# Retain top 100 most abundant archaea
arch_top100 <- names(sort(taxa_sums(ps_arch_ra), TRUE)[1:100])
ps_arch_top100 <- prune_taxa(arch_top100, ps_arch)
# Retain top 150 most abundant archaea
arch_top150 <- names(sort(taxa_sums(ps_arch_ra), TRUE)[1:150])
ps_arch_top150 <- prune_taxa(arch_top150, ps_arch)
# Retain top 100 most abundant archaea
euk_top100 <- names(sort(taxa_sums(ps_ra), TRUE)[1:100])
ps_euk_top100 <- prune_taxa(euk_top100, ps)
# Retain top 150 most abundant archaea
euk_top150 <- names(sort(taxa_sums(ps_ra), TRUE)[1:150])
ps_euk_top150 <- prune_taxa(euk_top150, ps)
Change the sample names in the otu tables to sample “Type”
# Archaea
# # remove missing archaea samples from samplekey_A
samplekey_A <- filter(samplekey, SampleID_arch %in% colnames(otu_table(ps_arch_top100)))
# sort SampleKey by order of column names from ps_arch_top100
samplekey_A <- samplekey_A %>% arrange(factor(SampleID_arch, levels = colnames(otu_table(ps_arch_top100))))
# and fill in sample names of filtered phyloseq objects
sample_names(ps_arch_top100) <- samplekey_A$Type
# and ps_arch_top150
sample_names(ps_arch_top150) <- samplekey_A$Type
# Bacteria
samplekey_B <- filter(samplekey, SampleID_bac %in% colnames(otu_table(ps_bac_top100)))
samplekey_B <- samplekey_B %>% arrange(factor(SampleID_bac, levels = colnames(otu_table(ps_bac_top100))))
sample_names(ps_bac_top100) <- samplekey_B$Type
sample_names(ps_bac_top150) <- samplekey_B$Type
# Eukaryotes
samplekey_E <- filter(samplekey, SampleID_euk %in% colnames(otu_table(ps_euk_top100)))
samplekey_E <- samplekey_E %>% arrange(factor(SampleID_euk, levels = colnames(otu_table(ps_euk_top100))))
sample_names(ps_euk_top100) <- samplekey_E$Type
sample_names(ps_euk_top150) <- samplekey_E$Type
Similar to above, pull out top 100 from each domain/ redox regime in order to make networks of the same size
# Retain top 100 most abundant bacteria
bac_top100_oxycline <- names(sort(taxa_sums(ps_bac_ra_oxycline), TRUE)[1:100])
ps_bac_top100_oxycline <- prune_taxa(bac_top100_oxycline, ps_bac_oxycline)
# Retain top 100 most abundant archaea
arch_top100_oxycline <- names(sort(taxa_sums(ps_arch_ra_oxycline), TRUE)[1:100])
ps_arch_top100_oxycline <- prune_taxa(arch_top100_oxycline, ps_arch_oxycline)
# Retain top 100 most abundant eukarya
euk_top100_oxycline <- names(sort(taxa_sums(ps_euk_ra_oxycline), TRUE)[1:100])
ps_euk_top100_oxycline <- prune_taxa(euk_top100_oxycline, ps_euk_oxycline)
Change the sample names in the otu tables to “Type”
# Archaea
# remove missing archaea samples from samplekey_A
samplekey_A <- filter(samplekey, SampleID_arch %in% colnames(otu_table(ps_arch_top100_oxycline)))
# sort SampleKey by order of column names from ps_arch_top100_oxycline
samplekey_A <- samplekey_A %>% arrange(factor(SampleID_arch, levels = colnames(otu_table(ps_arch_top100_oxycline))))
sample_names(ps_arch_top100_oxycline) <- samplekey_A$Type
# Bacteria
samplekey_B <- filter(samplekey, SampleID_bac %in% colnames(otu_table(ps_bac_top100_oxycline)))
samplekey_B <- samplekey_B %>% arrange(factor(SampleID_bac, levels = colnames(otu_table(ps_bac_top100_oxycline))))
sample_names(ps_bac_top100_oxycline) <- samplekey_B$Type
# Eukaryotes
samplekey_E <- filter(samplekey, SampleID_euk %in% colnames(otu_table(ps_euk_top100_oxycline)))
samplekey_E <- samplekey_E %>% arrange(factor(SampleID_euk, levels = colnames(otu_table(ps_euk_top100_oxycline))))
sample_names(ps_euk_top100_oxycline) <- samplekey_E$Type
# Retain top 100 most abundant bacteria
bac_top100_anoxic <- names(sort(taxa_sums(ps_bac_ra_anoxic), TRUE)[1:100])
ps_bac_top100_anoxic <- prune_taxa(bac_top100_anoxic, ps_bac_anoxic)
# Retain top 100 most abundant archaea
arch_top100_anoxic <- names(sort(taxa_sums(ps_arch_ra_anoxic), TRUE)[1:100])
ps_arch_top100_anoxic <- prune_taxa(arch_top100_anoxic, ps_arch_anoxic)
# Retain top 100 most abundant eukarya
euk_top100_anoxic <- names(sort(taxa_sums(ps_euk_ra_anoxic), TRUE)[1:100])
ps_euk_top100_anoxic <- prune_taxa(euk_top100_anoxic, ps_euk_anoxic)
Change the sample names in the otu tables to “Type”
# Archaea
# remove missing archaea samples from samplekey_A
samplekey_A <- filter(samplekey, SampleID_arch %in% colnames(otu_table(ps_arch_top100_anoxic)))
# sort SampleKey by order of column names from ps_arch_top100_anoxic
samplekey_A <- samplekey_A %>% arrange(factor(SampleID_arch, levels = colnames(otu_table(ps_arch_top100_anoxic))))
sample_names(ps_arch_top100_anoxic) <- samplekey_A$Type
# Bacteria
samplekey_B <- filter(samplekey, SampleID_bac %in% colnames(otu_table(ps_bac_top100_anoxic)))
samplekey_B <- samplekey_B %>% arrange(factor(SampleID_bac, levels = colnames(otu_table(ps_bac_top100_anoxic))))
sample_names(ps_bac_top100_anoxic) <- samplekey_B$Type
# Eukaryotes
samplekey_E <- filter(samplekey, SampleID_euk %in% colnames(otu_table(ps_euk_top100_anoxic)))
samplekey_E <- samplekey_E %>% arrange(factor(SampleID_euk, levels = colnames(otu_table(ps_euk_top100_anoxic))))
sample_names(ps_euk_top100_anoxic) <- samplekey_E$Type
Pull out samples from euxinic regime
# Retain top 100 most abundant bacteria
bac_top100_euxinic <- names(sort(taxa_sums(ps_bac_ra_euxinic), TRUE)[1:100])
ps_bac_top100_euxinic <- prune_taxa(bac_top100_euxinic, ps_bac_euxinic)
# Retain top 100 most abundant archaea
arch_top100_euxinic <- names(sort(taxa_sums(ps_arch_ra_euxinic), TRUE)[1:100])
ps_arch_top100_euxinic <- prune_taxa(arch_top100_euxinic, ps_arch_euxinic)
# Retain top 100 most abundant archaea
euk_top100_euxinic <- names(sort(taxa_sums(ps_euk_ra_euxinic), TRUE)[1:100])
ps_euk_top100_euxinic <- prune_taxa(euk_top100_euxinic, ps_euk_euxinic)
Change the sample names in the otu tables to “Type”
# Archaea
# remove missing archaea samples from samplekey_A
samplekey_A <- filter(samplekey, SampleID_arch %in% colnames(otu_table(ps_arch_top100_euxinic)))
# sort SampleKey by order of column names from ps_arch_top100_euxinic
samplekey_A <- samplekey_A %>% arrange(factor(SampleID_arch, levels = colnames(otu_table(ps_arch_top100_euxinic))))
sample_names(ps_arch_top100_euxinic) <- samplekey_A$Type
# Bacteria
samplekey_B <- filter(samplekey, SampleID_bac %in% colnames(otu_table(ps_bac_top100_euxinic)))
samplekey_B <- samplekey_B %>% arrange(factor(SampleID_bac, levels = colnames(otu_table(ps_bac_top100_euxinic))))
sample_names(ps_bac_top100_euxinic) <- samplekey_B$Type
# Eukaryotes
samplekey_E <- filter(samplekey, SampleID_euk %in% colnames(otu_table(ps_euk_top100_euxinic)))
samplekey_E <- samplekey_E %>% arrange(factor(SampleID_euk, levels = colnames(otu_table(ps_euk_top100_euxinic))))
sample_names(ps_euk_top100_euxinic) <- samplekey_E$Type
Remove all known parasites from eukaryotic table then put back together into whole dataset
# Flag the parasites in the eukaryotic table according to known parasites identified in literature review: Syndiniales, Oligohymenophorea, Labyrinthulomycetes, chytrids, Cryomonadida: Protaspa, Apicomplexa and Perkinsida (alveolates)
nonparasites <- as.data.frame(tax_table(ps)) %>%
filter(!grepl('Syndiniales', Class)) %>%
filter(!grepl('Oligohymenophorea', Class)) %>%
filter(!grepl('Labyrinthulomycetes', Class)) %>%
filter(!grepl('Chytridiomycota', Class)) %>%
filter(!grepl('Protaspa-lineage', Family)) %>%
filter(!grepl('Apicomplexa', Division)) %>%
filter(!grepl('Perkinsida', Class))
nonparasites
# Reduced from 13,427 rows to 7,344
# parasites only
parasites <- as.data.frame(tax_table(ps)) %>%
filter(!row.names(as.data.frame(tax_table(ps))) %in% rownames(nonparasites))
parasites
# 6083 parasites
# Make new phyloseq objects
ps_euk_nonparasites <- prune_taxa(rownames(nonparasites), ps)
ps_euk_parasites <- prune_taxa(rownames(parasites), ps)
# Retain top 100 from each of these
euk_top100_nonparasites <- names(sort(taxa_sums(ps_euk_nonparasites), TRUE)[1:100])
ps_euk_nonparasites_top100 <- prune_taxa(euk_top100_nonparasites, ps)
euk_top100_parasites <- names(sort(taxa_sums(ps_euk_parasites), TRUE)[1:100])
ps_euk_parasites_top100 <- prune_taxa(euk_top100_parasites, ps)
Export table of parasite taxonomy for supplemental table in manuscript
write_csv(distinct(parasites), "Figures/parasites_list.csv")
distinct(parasites)
# How many parasites out of total euks?
dim(parasites)[1]/dim(tax_table(ps))[1]
[1] 0.4530424
Change the sample names in the otu tables to sample “Type”
samplekey_E <- filter(samplekey, SampleID_euk %in% colnames(otu_table(ps_euk_nonparasites_top100)))
samplekey_E <- samplekey_E %>% arrange(factor(SampleID_euk, levels = colnames(otu_table(ps_euk_nonparasites_top100))))
sample_names(ps_euk_nonparasites_top100) <- samplekey_E$Type
samplekey_E <- filter(samplekey, SampleID_euk %in% colnames(otu_table(ps_euk_parasites_top100)))
samplekey_E <- samplekey_E %>% arrange(factor(SampleID_euk, levels = colnames(otu_table(ps_euk_parasites_top100))))
sample_names(ps_euk_parasites_top100) <- samplekey_E$Type
Using top 100 OTUs/ASVs from each of the 3 domains
Remove samples from the phyloseq objects that are not in all 3 domains and reorder samples so they are in same order in all 3 objects
bac_arch_common <- intersect(sample_names(ps_bac_ra_pruned), sample_names(ps_arch_ra_pruned))
all_common <- intersect(bac_arch_common, sample_names(ps_euk_ra_pruned))
ps_bac_pruned_3domains_top100 <- prune_samples(all_common, ps_bac_top100)
ps_arch_pruned_3domains_top100 <- prune_samples(all_common, ps_arch_top100)
ps_euk_pruned_3domains_top100 <- prune_samples(all_common, ps_euk_top100)
otu_table(ps_arch_pruned_3domains_top100) <- otu_table(ps_arch_pruned_3domains_top100)[,sample_names(ps_bac_pruned_3domains_top100)]
otu_table(ps_euk_pruned_3domains_top100) <- otu_table(ps_euk_pruned_3domains_top100)[,sample_names(ps_bac_pruned_3domains_top100)]
sample_data(ps_bac_pruned_3domains)
Sample Data: [36 samples by 66 sample variables]:
sample_data(ps_arch_pruned_3domains)
Sample Data: [36 samples by 66 sample variables]:
sample_data(ps_euk_pruned_3domains)
Sample Data: [36 samples by 66 sample variables]:
#Run Spieceasi on 3 domain dataset with top 100 from each domain
pargs <- list(seed=10010)
se_3domains_top100 <- spiec.easi(list(ps_bac_pruned_3domains_top100, ps_arch_pruned_3domains_top100, ps_euk_pruned_3domains_top100), method='glasso', lambda.min.ratio=1e-2, nlambda=100, pulsar.params=pargs)
Applying data transformations...
Selecting model with pulsar using stars...
Remove samples from the phyloseq objects that are not in all 3 datasets. Reorder samples so they are in same order in all 3 objects
bac_arch_common <- intersect(sample_names(ps_bac_ra_pruned), sample_names(ps_arch_ra_pruned))
all_common <- intersect(bac_arch_common, sample_names(ps_euk_ra_pruned))
ps_bac_pruned_3domains_top100 <- prune_samples(all_common, ps_bac_top100)
ps_arch_pruned_3domains_top100 <- prune_samples(all_common, ps_arch_top100)
ps_euk_nonparasites_pruned_3domains_top100 <- prune_samples(all_common, ps_euk_nonparasites_top100)
otu_table(ps_arch_pruned_3domains_top100) <- otu_table(ps_arch_pruned_3domains_top100)[,sample_names(ps_bac_pruned_3domains_top100)]
otu_table(ps_euk_nonparasites_pruned_3domains_top100) <- otu_table(ps_euk_nonparasites_pruned_3domains_top100)[,sample_names(ps_bac_pruned_3domains_top100)]
sample_data(ps_bac_pruned_3domains_top100)
Sample Data: [36 samples by 66 sample variables]:
sample_data(ps_arch_pruned_3domains_top100)
Sample Data: [36 samples by 66 sample variables]:
sample_data(ps_euk_nonparasites_pruned_3domains_top100)
Sample Data: [36 samples by 66 sample variables]:
Run
getStability(se_3domains_top100_nonparasites)
[1] 0.04506923
Remove samples from the phyloseq objects that are not in all 3 datasets. Reorder samples so they are in same order in all 3 objects
bac_arch_common <- intersect(sample_names(ps_bac_ra_pruned), sample_names(ps_arch_ra_pruned))
all_common <- intersect(bac_arch_common, sample_names(ps_euk_ra_pruned))
ps_bac_pruned_3domains_top100 <- prune_samples(all_common, ps_bac_top100)
ps_arch_pruned_3domains_top100 <- prune_samples(all_common, ps_arch_top100)
ps_euk_parasites_pruned_3domains_top100 <- prune_samples(all_common, ps_euk_parasites_top100)
otu_table(ps_arch_pruned_3domains_top100) <- otu_table(ps_arch_pruned_3domains_top100)[,sample_names(ps_bac_pruned_3domains_top100)]
otu_table(ps_euk_parasites_pruned_3domains_top100) <- otu_table(ps_euk_parasites_pruned_3domains_top100)[,sample_names(ps_bac_pruned_3domains_top100)]
sample_data(ps_bac_pruned_3domains_top100)
Sample Data: [36 samples by 66 sample variables]:
sample_data(ps_arch_pruned_3domains_top100)
Sample Data: [36 samples by 66 sample variables]:
sample_data(ps_euk_parasites_pruned_3domains_top100)
Sample Data: [36 samples by 66 sample variables]:
Run
#Run Spieceasi on 3 domain dataset with top 100 from each domain
pargs <- list(seed=1001)
se_3domains_top100_parasites <- spiec.easi(list(ps_bac_pruned_3domains_top100, ps_arch_pruned_3domains_top100, ps_euk_parasites_pruned_3domains_top100), method='glasso', lambda.min.ratio=1e-2, nlambda=100, pulsar.params=pargs)
Applying data transformations...
Selecting model with pulsar using stars...
Fitting final estimate with glasso...
done
getStability(se_3domains_top100_parasites)
[1] 0.04376254
Prkaryotes only 150 from each domain, to keep size of network consistent
Remove samples from the phyloseq objects that are not in both domains and reorder samples so they are in same order in all 3 objects
bac_arch_common <- intersect(sample_names(ps_bac_ra_pruned), sample_names(ps_arch_ra_pruned))
ps_bac_pruned_2domains_top150 <- prune_samples(bac_arch_common, ps_bac_top150)
ps_arch_pruned_2domains_top150 <- prune_samples(bac_arch_common, ps_arch_top150)
otu_table(ps_arch_pruned_2domains_top150) <- otu_table(ps_arch_pruned_2domains_top150)[,sample_names(ps_bac_pruned_2domains_top150)]
sample_data(ps_bac_pruned_2domains_top150)
Sample Data: [36 samples by 66 sample variables]:
sample_data(ps_arch_pruned_2domains_top150)
Sample Data: [36 samples by 66 sample variables]:
Run
#Run Spieceasi while controlling for dataset size-
# Use the 150-member subset of each, bac and arch, so the full dataset is 300 members
# Same size as full dataset network
pargs <- list(seed=10010)
se_2domains_top150 <- spiec.easi(list(ps_bac_pruned_2domains_top150, ps_arch_pruned_2domains_top150), method='glasso', lambda.min.ratio=1e-2, nlambda=200, pulsar.params=pargs)
getStability(se_2domains_top150)
bac_arch_common <- intersect(sample_names(ps_bac_oxycline_pruned), sample_names(ps_arch_oxycline_pruned))
all_common <- intersect(bac_arch_common, sample_names(ps_euk_oxycline_pruned))
ps_bac_pruned_3domains_top100_oxycline <- prune_samples(all_common, ps_bac_top100_oxycline)
ps_arch_pruned_3domains_top100_oxycline <- prune_samples(all_common, ps_arch_top100_oxycline)
ps_euk_pruned_3domains_top100_oxycline <- prune_samples(all_common, ps_euk_top100_oxycline)
otu_table(ps_arch_pruned_3domains_top100_oxycline) <- otu_table(ps_arch_pruned_3domains_top100_oxycline)[,sample_names(ps_bac_pruned_3domains_top100_oxycline)]
otu_table(ps_euk_pruned_3domains_top100_oxycline) <- otu_table(ps_euk_pruned_3domains_top100_oxycline)[,sample_names(ps_bac_pruned_3domains_top100_oxycline)]
sample_data(ps_bac_pruned_3domains_top100_oxycline)
Sample Data: [21 samples by 66 sample variables]:
sample_data(ps_arch_pruned_3domains_top100_oxycline)
Sample Data: [21 samples by 66 sample variables]:
sample_data(ps_euk_pruned_3domains_top100_oxycline)
Sample Data: [21 samples by 66 sample variables]:
Run
#Run Spieceasi while controlling for dataset size-
# Use the 100-member subset of each, bac, arch, and euk from the oxycline, so the dataset is 300 members
# Same size as full dataset network
pargs <- list(seed=10010)
se_3domains_top100_oxycline <- spiec.easi(list(ps_bac_pruned_3domains_top100_oxycline, ps_arch_pruned_3domains_top100_oxycline, ps_euk_pruned_3domains_top100_oxycline), method='glasso', lambda.min.ratio=1e-2, nlambda=200, pulsar.params=pargs)
getStability(se_3domains_top100_oxycline)
bac_arch_common <- intersect(sample_names(ps_bac_anoxic_pruned), sample_names(ps_arch_anoxic_pruned))
all_common <- intersect(bac_arch_common, sample_names(ps_euk_anoxic_pruned))
ps_bac_pruned_3domains_top100_anoxic <- prune_samples(all_common, ps_bac_top100_anoxic)
ps_arch_pruned_3domains_top100_anoxic <- prune_samples(all_common, ps_arch_top100_anoxic)
ps_euk_pruned_3domains_top100_anoxic <- prune_samples(all_common, ps_euk_top100_anoxic)
otu_table(ps_arch_pruned_3domains_top100_anoxic) <- otu_table(ps_arch_pruned_3domains_top100_anoxic)[,sample_names(ps_bac_pruned_3domains_top100_anoxic)]
otu_table(ps_euk_pruned_3domains_top100_anoxic) <- otu_table(ps_euk_pruned_3domains_top100_anoxic)[,sample_names(ps_bac_pruned_3domains_top100_anoxic)]
sample_data(ps_bac_pruned_3domains_top100_anoxic)
Sample Data: [11 samples by 66 sample variables]:
sample_data(ps_arch_pruned_3domains_top100_anoxic)
Sample Data: [11 samples by 66 sample variables]:
sample_data(ps_euk_pruned_3domains_top100_anoxic)
Sample Data: [11 samples by 66 sample variables]:
#Run Spieceasi while controlling for dataset size-
# Use the 100-member subset of each, bac, arch, and euk from the oxycline, so the dataset is 300 members
# Same size as full dataset network
pargs <- list(seed=10010)
se_3domains_top100_anoxic <- spiec.easi(list(ps_bac_pruned_3domains_top100_anoxic, ps_arch_pruned_3domains_top100_anoxic, ps_euk_pruned_3domains_top100_anoxic), method='glasso', lambda.min.ratio=1e-2, nlambda=250, pulsar.params=pargs)
getStability(se_3domains_top100_anoxic)
bac_arch_common <- intersect(sample_names(ps_bac_euxinic_pruned), sample_names(ps_arch_euxinic_pruned))
all_common <- intersect(bac_arch_common, sample_names(ps_euk_euxinic_pruned))
ps_bac_pruned_3domains_top100_euxinic <- prune_samples(all_common, ps_bac_top100_euxinic)
ps_arch_pruned_3domains_top100_euxinic <- prune_samples(all_common, ps_arch_top100_euxinic)
ps_euk_pruned_3domains_top100_euxinic <- prune_samples(all_common, ps_euk_top100_euxinic)
otu_table(ps_arch_pruned_3domains_top100_euxinic) <- otu_table(ps_arch_pruned_3domains_top100_euxinic)[,sample_names(ps_bac_pruned_3domains_top100_euxinic)]
otu_table(ps_euk_pruned_3domains_top100_euxinic) <- otu_table(ps_euk_pruned_3domains_top100_euxinic)[,sample_names(ps_bac_pruned_3domains_top100_euxinic)]
sample_data(ps_bac_pruned_3domains_top100_euxinic)
Sample Data: [4 samples by 66 sample variables]:
sample_data(ps_arch_pruned_3domains_top100_euxinic)
Sample Data: [4 samples by 66 sample variables]:
sample_data(ps_euk_pruned_3domains_top100_euxinic)
Sample Data: [4 samples by 66 sample variables]:
Run
#Run Spieceasi while controlling for dataset size-
# Use the 100-member subset of each, bac, arch, and euk from the oxycline, so the dataset is 300 members
# Same size as full dataset network
pargs <- list(seed=10010)
se_3domains_top100_euxinic <- spiec.easi(list(ps_bac_pruned_3domains_top100_euxinic, ps_arch_pruned_3domains_top100_euxinic, ps_euk_pruned_3domains_top100_euxinic), method='glasso', lambda.min.ratio=1e-1, nlambda=100, pulsar.params=pargs)
getStability(se_3domains_top100_euxinic)
–> no matter the parameters, I still can’t get this to work. It’s too small
Need to save in two portions because they’re too big-
save(se_3domains_top100, file = "EnvironmentBackups/CariacoEuks_revisions2_additionalvars_1.RData")
save(se_2domains_top150, file = "EnvironmentBackups/CariacoEuks_revisions2_additionalvars_2.RData")
save(se_3domains_top100_oxycline, file = "EnvironmentBackups/CariacoEuks_revisions2_additionalvars_3.RData")
save(se_3domains_top100_anoxic, file = "EnvironmentBackups/CariacoEuks_revisions2_additionalvars_4.RData")
save(se_3domains_top100_nonparasites, file = "EnvironmentBackups/CariacoEuks_revisions2_additionalvars_5.RData")
save(se_3domains_top100_parasites, file = "EnvironmentBackups/CariacoEuks_revisions2_additionalvars_6.RData")
# these are very large. remove them and load one-by-one while working:
# rm(se_3domains_top100, se_2domains_top150, se_3domains_top100_oxycline, se_3domains_top100_anoxic, se_3domains_top100_nonparasites, se_3domains_top100_parasites)
First extract ASV names from original phyloseq objects in order to retain for network nodes. Make sure these are in same order in which they went in SpiecEasi
load("EnvironmentBackups/CariacoEuks_revisions2_additionalvars_1.RData")
top300_df <- bind_rows(data.frame(otu_table(ps_bac_top100)), data.frame(otu_table(ps_arch_top100)), data.frame(otu_table(ps_euk_top100)))
top300_df
#Extract adjacency matrix from spiecEasi output
adj.mat <- getRefit(se_3domains_top100)
table(as.numeric(adj.mat))
0 1
85064 4936
# Extract weighted adjacency
se.cor <- cov2cor(as.matrix(getOptCov(se_3domains_top100)))
weighted.adj.mat <- se.cor*getRefit(se_3domains_top100)
#Convert to graph objects
grph.unweighted.top300 <- adj2igraph(adj.mat)
grph.top300 <- adj2igraph(weighted.adj.mat)
# Put back in species names
V(grph.top300)$name <- rownames(top300_df)
# V(grph.top300)
# Make size of nodes proportional to degree (number of connections)
V(grph.top300)$size <- (degree(grph.top300) + 1) # the +1 avoids size zero vertices
# Change width of edges to be proportional to their weights
E(grph.top300)$width <- abs(E(grph.top300)$weight)*10
# Scale node sizes to be smaller
V(grph.top300)$size <- V(grph.top300)$size/2
# Join taxonomy data of each node
# Convert graph to datafram
grph.top300_df <- igraph::as_data_frame(grph.top300, 'both')
# make formatted taxonomy tables for each domain
bac_top100_tax_table <- as.data.frame(tax_table(ps_bac_pruned_3domains_top100)) %>%
mutate(name = rownames(tax_table(ps_bac_pruned_3domains_top100)))
arch_top100_tax_table <- as.data.frame(tax_table(ps_arch_pruned_3domains_top100)) %>%
mutate(name = rownames(tax_table(ps_arch_pruned_3domains_top100)))
euk_top100_tax_table <- as.data.frame(tax_table(ps_euk_pruned_3domains_top100)) %>%
mutate(name = rownames(tax_table(ps_euk_pruned_3domains_top100)))
#link graph data frame to formatted taxonomy tables
bac_temp <- left_join(grph.top300_df$vertices[1:ntaxa(ps_bac_pruned_3domains_top100),],
bac_top100_tax_table, by = "name")
# delete columns that don't match other tax tables
bac_temp <- select(bac_temp, -"taxonomy-9", -"Refined taxonomy")
arch_temp <- left_join(grph.top300_df$vertices[ntaxa(ps_arch_pruned_3domains_top100)+1:ntaxa(ps_arch_pruned_3domains_top100),],arch_top100_tax_table, by = "name")
euk_temp <- left_join(grph.top300_df$vertices[ntaxa(ps_euk_pruned_3domains_top100)+ntaxa(ps_euk_pruned_3domains_top100)+1:ntaxa(ps_euk_pruned_3domains_top100),], euk_top100_tax_table, by = "name")
# rename column names in euk table to match others
euk_temp <- euk_temp %>%
rename("taxonomy-1" = Kingdom, "taxonomy-2" = Supergroup, "taxonomy-3" = Division, "taxonomy-4" = Class, "taxonomy-5" = Order, "taxonomy-6" = Family, "taxonomy-7" = Genus, "taxonomy-8" = Species)
# build full dataframe with all 3 domains
all_temp <- rbind(bac_temp, arch_temp, euk_temp)
# remake into graph
grph.top300 <- graph_from_data_frame(grph.top300_df$edges,
directed = F,
vertices = all_temp)
# Make color paletter for domain
dtype = c("red", "green", "blue", "grey")
# Make color vector
domain_color_top300 <- dtype[as.numeric(as.factor(V(grph.top300)$"taxonomy-1"))]
# Plot
plot(grph.top300,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300),
vertex.color=domain_color_top300)
title("SpiecEasi Network: All domains")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya", "No blast hit"),
fill=c("red","green","blue", "grey"), border=NA)
# Save plot
setEPS()
postscript(file = "Figures/3domains_top300_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.top300,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300),
vertex.color=domain_color_top300)
title("SpiecEasi Network: All domains")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya", "No blast hit"),
fill=c("red","green","blue", "grey"), border=NA)
dev.off()
quartz_off_screen
2
# Subset based on pos or neg edges
grph.top300.pos <-delete.edges(grph.top300, which(E(grph.top300)$weight<0))
grph.top300.neg <-delete.edges(grph.top300, which(E(grph.top300)$weight>0))
# For each subsetted graph, remove those nodes that are no longer connected to anything
grph.top300.pos <- delete.vertices(grph.top300.pos, which(degree(grph.top300.pos)==0))
grph.top300.neg <- delete.vertices(grph.top300.neg, which(degree(grph.top300.neg)==0))
# Make color vector for each
domain_color_top300_pos <- dtype[as.numeric(as.factor(V(grph.top300.pos)$"taxonomy-1"))]
domain_color_top300_neg <- dtype[as.numeric(as.factor(V(grph.top300.neg)$"taxonomy-1"))]
# Plot pos
plot(grph.top300.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.pos),
vertex.color=domain_color_top300_pos)
title("SpiecEasi Network: All domains, Positive Edges only")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya", "No blast hit"),
fill=c("red","green","blue", "grey"), border=NA)
# Plot neg
plot(grph.top300.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.neg),
vertex.color=domain_color_top300_neg)
title("SpiecEasi Network: All domains, Negative Edges Only")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya", "No blast hit"),
fill=c("red","green","blue", "grey"), border=NA)
# Save plots
setEPS()
postscript(file = "Figures/3domains_top300_posedges_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.top300.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.pos),
vertex.color=domain_color_top300_pos)
title("SpiecEasi Network: All domains, Positive Edges Only, Oxycline")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya", "No blast hit"),
fill=c("red","green","blue", "grey"), border=NA)
dev.off()
quartz_off_screen
2
setEPS()
postscript(file = "Figures/3domains_top300_negedges_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.top300.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.neg),
vertex.color=domain_color_oxycline_neg)
title("SpiecEasi Network: All domains, Negative Edges Only")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya", "No blast hit"),
fill=c("red","green","blue", "grey"), border=NA)
dev.off()
quartz_off_screen
2
First extract ASV names from original phyloseq objects in order to retain for network nodes. Make sure these are in same order in which they went in SpiecEasi
rm(se_3domains_top100)
load("EnvironmentBackups/CariacoEuks_revisions2_additionalvars_2.RData")
top300_2domains_df <- bind_rows(data.frame(otu_table(ps_bac_top150)), data.frame(otu_table(ps_arch_top150)))
top300_2domains_df
#Extract adjacency matrix from spiecEasi output
adj.mat <- getRefit(se_2domains_top150)
table(as.numeric(adj.mat))
0 1
83100 6900
# Extract weighted adjacency
se.cor <- cov2cor(as.matrix(getOptCov(se_2domains_top150)))
weighted.adj.mat <- se.cor*getRefit(se_2domains_top150)
#Convert to graph objects
grph.unweighted.top300 <- adj2igraph(adj.mat)
grph.2domains.top300 <- adj2igraph(weighted.adj.mat)
# Put back in species names
V(grph.2domains.top300)$name <- rownames(top300_2domains_df)
# Make size of nodes proportional to degree (number of connections)
V(grph.2domains.top300)$size <- (degree(grph.2domains.top300) + 1) # the +1 avoids size zero vertices
# Change width of edges to be proportional to their weights
E(grph.2domains.top300)$width <- abs(E(grph.2domains.top300)$weight)*10
# Scale node sizes to be smaller
V(grph.2domains.top300)$size <- V(grph.2domains.top300)$size/2
# Join taxonomy data of each node
# Convert graph to datafram
top300_2domains_df <- igraph::as_data_frame(grph.2domains.top300, 'both')
# make formatted taxonomy tables for each domain
bac_top150_tax_table <- as.data.frame(tax_table(ps_bac_pruned_2domains_top150)) %>%
mutate(name = rownames(tax_table(ps_bac_pruned_2domains_top150)))
arch_top150_tax_table <- as.data.frame(tax_table(ps_arch_pruned_2domains_top150)) %>%
mutate(name = rownames(tax_table(ps_arch_pruned_2domains_top150)))
#link graph data frame to formatted taxonomy tables
bac_temp <- left_join(top300_2domains_df$vertices[1:ntaxa(ps_bac_pruned_2domains_top150),], bac_top150_tax_table, by = "name")
# delete columns that don't match other tax tables
bac_temp <- select(bac_temp, -"taxonomy-9", -"Refined taxonomy")
arch_temp <- left_join(top300_2domains_df$vertices[ntaxa(ps_arch_pruned_2domains_top150)+1:ntaxa(ps_arch_pruned_2domains_top150),],arch_top150_tax_table, by = "name")
# build full dataframe with all 3 domains
all_temp <- rbind(bac_temp, arch_temp)
# remake into graph
grph.2domains.top300 <- graph_from_data_frame(top300_2domains_df$edges,
directed = F,
vertices = all_temp)
# Make color paletter for domain
dtype = c("red", "green", "grey")
# Make color vector
domain_color_2domains_top300 <- dtype[as.numeric(as.factor(V(grph.2domains.top300)$"taxonomy-1"))]
# Plot
plot(grph.2domains.top300,
vertex.label=NA,
layout=layout_with_graphopt(grph.2domains.top300),
vertex.color=domain_color_2domains_top300)
title("SpiecEasi Network: Two domains")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "No Blast Hit"),
fill=c("red","green","grey"), border=NA)
# Save plot
setEPS()
postscript(file = "Figures/2domains_top300_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.2domains.top300,
vertex.label=NA,
layout=layout_with_graphopt(grph.2domains.top300),
vertex.color=domain_color_2domains_top300)
title("SpiecEasi Network: Two domains")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "No Blast Hit"),
fill=c("red","green","grey"), border=NA)
dev.off()
quartz_off_screen
2
# Subset based on pos or neg edges
grph.top300.2domains.pos <-delete.edges(grph.2domains.top300, which(E(grph.2domains.top300)$weight<0))
grph.top300.2domains.neg <-delete.edges(grph.2domains.top300, which(E(grph.2domains.top300)$weight>0))
# For each subsetted graph, remove those nodes that are no longer connected to anything
grph.top300.2domains.pos <- delete.vertices(grph.top300.2domains.pos, which(degree(grph.top300.2domains.pos)==0))
grph.top300.2domains.neg <- delete.vertices(grph.top300.2domains.neg, which(degree(grph.top300.2domains.neg)==0))
# Make color vector for each
dtype <- c("red", "green", "grey" ) # temporarily change for this plot
domain_color_2domains_top300_pos <- dtype[as.numeric(as.factor(V(grph.top300.2domains.pos)$"taxonomy-1"))]
domain_color_2domains_top300_neg <- dtype[as.numeric(as.factor(V(grph.top300.2domains.neg)$"taxonomy-1"))]
dtype <- c("red", "green", "blue", "grey" ) # change back
# Plot pos
plot(grph.top300.2domains.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.2domains.pos),
vertex.color=domain_color_2domains_top300_pos)
title("SpiecEasi Network: Two domains, Positive Edges only")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "No Blast Hit"),
fill=c("red","green","grey"), border=NA)
# Plot neg
plot(grph.top300.2domains.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.2domains.neg),
vertex.color=domain_color_2domains_top300_neg)
title("SpiecEasi Network: Two domains, Negative Edges Only")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "No Blast Hit"),
fill=c("red","green","grey"), border=NA)
# Save plots
setEPS()
postscript(file = "Figures/2domains_top300_posedges_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.top300.2domains.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.2domains.pos),
vertex.color=domain_color_2domains_top300_pos)
title("SpiecEasi Network: Two domains, Positive Edges Only, Oxycline")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "No Blast Hit"),
fill=c("red","green","grey"), border=NA)
dev.off()
quartz_off_screen
2
setEPS()
postscript(file = "Figures/2domains_top300_negedges_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.top300.2domains.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.2domains.neg),
vertex.color=domain_color_2domains_top300_neg)
title("SpiecEasi Network: Two domains, Negative Edges Only")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "No Blast Hit"),
fill=c("red","green","grey"), border=NA)
dev.off()
quartz_off_screen
2
First extract ASV names from original phyloseq objects in order to retain for network nodes. Make sure these are in same order in which they went in SpiecEasi
rm(se_2domains_top150)
load("EnvironmentBackups/CariacoEuks_revisions2_additionalvars_3.RData")
top300_oxycline_df <- bind_rows(data.frame(otu_table(ps_bac_pruned_3domains_top100_oxycline)), data.frame(otu_table(ps_arch_pruned_3domains_top100_oxycline)), data.frame(otu_table(ps_euk_pruned_3domains_top100_oxycline)))
top300_oxycline_df
#Extract adjacency matrix from spiecEasi output
adj.mat <- getRefit(se_3domains_top100_oxycline)
table(as.numeric(adj.mat))
0 1
86930 3070
# Extract weighted adjacency
se.cor <- cov2cor(as.matrix(getOptCov(se_3domains_top100_oxycline)))
weighted.adj.mat <- se.cor*getRefit(se_3domains_top100_oxycline)
#Convert to graph objects
grph.unweighted.top300.oxycline <- adj2igraph(adj.mat)
grph.top300.oxycline <- adj2igraph(weighted.adj.mat)
# Put back in species names
V(grph.top300.oxycline)$name <- rownames(top300_oxycline_df)
# Make size of nodes proportional to degree (number of connections)
V(grph.top300.oxycline)$size <- (degree(grph.top300.oxycline) + 1) # the +1 avoids size zero vertices
# Change width of edges to be proportional to their weights
E(grph.top300.oxycline)$width <- abs(E(grph.top300.oxycline)$weight)*10
# Scale node sizes to be smaller
V(grph.top300.oxycline)$size <- V(grph.top300.oxycline)$size/2
# Join taxonomy data of each node
# Convert graph to datafram
grph.top300_oxycline_df <- igraph::as_data_frame(grph.top300.oxycline, 'both')
# make formatted taxonomy tables for each domain
bac_top100_tax_table <- as.data.frame(tax_table(ps_bac_pruned_3domains_top100_oxycline)) %>%
mutate(name = rownames(tax_table(ps_bac_pruned_3domains_top100_oxycline)))
arch_top100_tax_table <- as.data.frame(tax_table(ps_arch_pruned_3domains_top100_oxycline)) %>%
mutate(name = rownames(tax_table(ps_arch_pruned_3domains_top100_oxycline)))
euk_top100_tax_table <- as.data.frame(tax_table(ps_euk_pruned_3domains_top100_oxycline)) %>%
mutate(name = rownames(tax_table(ps_euk_pruned_3domains_top100_oxycline)))
#link graph data frame to formatted taxonomy tables
bac_temp <- left_join(grph.top300_oxycline_df$vertices[1:ntaxa(ps_bac_pruned_3domains_top100_oxycline),],
bac_top100_tax_table, by = "name")
# delete columns that don't match other tax tables
bac_temp <- select(bac_temp, -"taxonomy-9", -"Refined taxonomy")
arch_temp <- left_join(grph.top300_oxycline_df$vertices[ntaxa(ps_arch_pruned_3domains_top100_oxycline)+1:ntaxa(ps_arch_pruned_3domains_top100_oxycline),],arch_top100_tax_table, by = "name")
euk_temp <- left_join(grph.top300_oxycline_df$vertices[ntaxa(ps_euk_pruned_3domains_top100_oxycline)+ntaxa(ps_euk_pruned_3domains_top100_oxycline)+1:ntaxa(ps_euk_pruned_3domains_top100_oxycline),], euk_top100_tax_table, by = "name")
# rename column names in euk table to match others
euk_temp <- euk_temp %>%
rename("taxonomy-1" = Kingdom, "taxonomy-2" = Supergroup, "taxonomy-3" = Division, "taxonomy-4" = Class, "taxonomy-5" = Order, "taxonomy-6" = Family, "taxonomy-7" = Genus, "taxonomy-8" = Species)
# build full dataframe with all 3 domains
all_temp <- rbind(bac_temp, arch_temp, euk_temp)
# remake into graph
grph.top300.oxycline <- graph_from_data_frame(grph.top300_oxycline_df$edges,
directed = F,
vertices = all_temp)
# Make color paletter for domain
dtype = c("red", "green", "blue", "grey")
# Make color vector
domain_color_top300_oxycline <- dtype[as.numeric(as.factor(V(grph.top300.oxycline)$"taxonomy-1"))]
# Plot
plot(grph.top300.oxycline,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.oxycline),
vertex.color=domain_color_top300_oxycline)
title("SpiecEasi Network: All domains, Oxycline")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya", "No Blast Hit"),
fill=c("red","green","blue", "grey"), border=NA)
# Save plot
setEPS()
postscript(file = "Figures/3domains_top300_oxycline_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.top300.oxycline,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.oxycline),
vertex.color=domain_color_top300_oxycline)
title("SpiecEasi Network: All domains, Oxycline")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya", "No Blast Hit"),
fill=c("red","green","blue", "grey"), border=NA)
dev.off()
quartz_off_screen
2
# Subset based on pos or neg edges
grph.top300.oxycline.pos <-delete.edges(grph.top300.oxycline, which(E(grph.top300.oxycline)$weight<0))
grph.top300.oxycline.neg <-delete.edges(grph.top300.oxycline, which(E(grph.top300.oxycline)$weight>0))
# For each subsetted graph, remove those nodes that are no longer connected to anything
grph.top300.oxycline.pos <- delete.vertices(grph.top300.oxycline.pos, which(degree(grph.top300.oxycline.pos)==0))
grph.top300.oxycline.neg <- delete.vertices(grph.top300.oxycline.neg, which(degree(grph.top300.oxycline.neg)==0))
# Make color vector for each
domain_color_top300_oxycline_pos <- dtype[as.numeric(as.factor(V(grph.top300.oxycline.pos)$"taxonomy-1"))]
domain_color_top300_oxycline_neg <- dtype[as.numeric(as.factor(V(grph.top300.oxycline.neg)$"taxonomy-1"))]
# Plot pos
plot(grph.top300.oxycline.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.oxycline.pos),
vertex.color=domain_color_top300_oxycline_pos)
title("SpiecEasi Network: All domains, Positive Edges only, oxycline")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya", "No Blast Hit"),
fill=c("red","green","blue", "grey"), border=NA)
# Plot neg
plot(grph.top300.oxycline.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.oxycline.neg),
vertex.color=domain_color_top300_oxycline_neg)
title("SpiecEasi Network: All domains, Negative Edges Only, oxycline")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya", "No Blast Hit"),
fill=c("red","green","blue", "grey"), border=NA)
# Save plots
setEPS()
postscript(file = "Figures/3domains_top300_posedges_oxycline_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.top300.oxycline.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.oxycline.pos),
vertex.color=domain_color_top300_oxycline_pos)
title("SpiecEasi Network: All domains, Positive Edges Only, oxycline")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya", "No Blast Hit"),
fill=c("red","green","blue", "grey"), border=NA)
dev.off()
quartz_off_screen
2
setEPS()
postscript(file = "Figures/3domains_top300_negedges_oxycline_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.top300.oxycline.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.oxycline.neg),
vertex.color=domain_color_top300_oxycline_neg)
title("SpiecEasi Network: All domains, Negative Edges Only, oxycline")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya", "No Blast Hit"),
fill=c("red","green","blue", "grey"), border=NA)
dev.off()
quartz_off_screen
2
First extract ASV names from original phyloseq objects in order to retain for network nodes. Make sure these are in same order in which they went in SpiecEasi
rm(se_3domains_top100_oxycline)
load("EnvironmentBackups/CariacoEuks_revisions2_additionalvars_4.RData")
top300_anoxic_df <- bind_rows(data.frame(otu_table(ps_bac_pruned_3domains_top100_anoxic)), data.frame(otu_table(ps_arch_pruned_3domains_top100_anoxic)), data.frame(otu_table(ps_euk_pruned_3domains_top100_anoxic)))
top300_anoxic_df
#Extract adjacency matrix from spiecEasi output
adj.mat <- getRefit(se_3domains_top100_anoxic)
table(as.numeric(adj.mat))
0 1
88466 1534
# Extract weighted adjacency
se.cor <- cov2cor(as.matrix(getOptCov(se_3domains_top100_anoxic)))
weighted.adj.mat <- se.cor*getRefit(se_3domains_top100_anoxic)
#Convert to graph objects
grph.unweighted.top300.anoxic <- adj2igraph(adj.mat)
grph.top300.anoxic <- adj2igraph(weighted.adj.mat)
# Put back in species names
V(grph.top300.anoxic)$name <- rownames(top300_anoxic_df)
# Make size of nodes proportional to degree (number of connections)
V(grph.top300.anoxic)$size <- (degree(grph.top300.anoxic) + 1) # the +1 avoids size zero vertices
# Change width of edges to be proportional to their weights
E(grph.top300.anoxic)$width <- abs(E(grph.top300.anoxic)$weight)*10
# Scale node sizes to be smaller
V(grph.top300.anoxic)$size <- V(grph.top300.anoxic)$size/2
# Join taxonomy data of each node
# Convert graph to datafram
grph.top300_anoxic_df <- igraph::as_data_frame(grph.top300.anoxic, 'both')
# make formatted taxonomy tables for each domain
bac_top100_tax_table <- as.data.frame(tax_table(ps_bac_pruned_3domains_top100_anoxic)) %>%
mutate(name = rownames(tax_table(ps_bac_pruned_3domains_top100_anoxic)))
arch_top100_tax_table <- as.data.frame(tax_table(ps_arch_pruned_3domains_top100_anoxic)) %>%
mutate(name = rownames(tax_table(ps_arch_pruned_3domains_top100_anoxic)))
euk_top100_tax_table <- as.data.frame(tax_table(ps_euk_pruned_3domains_top100_anoxic)) %>%
mutate(name = rownames(tax_table(ps_euk_pruned_3domains_top100_anoxic)))
#link graph data frame to formatted taxonomy tables
bac_temp <- left_join(grph.top300_anoxic_df$vertices[1:ntaxa(ps_bac_pruned_3domains_top100_anoxic),],
bac_top100_tax_table, by = "name")
# delete columns that don't match other tax tables
bac_temp <- select(bac_temp, -"taxonomy-9", -"Refined taxonomy")
arch_temp <- left_join(grph.top300_anoxic_df$vertices[ntaxa(ps_arch_pruned_3domains_top100_anoxic)+1:ntaxa(ps_arch_pruned_3domains_top100_anoxic),],arch_top100_tax_table, by = "name")
euk_temp <- left_join(grph.top300_anoxic_df$vertices[ntaxa(ps_euk_pruned_3domains_top100_anoxic)+ntaxa(ps_euk_pruned_3domains_top100_anoxic)+1:ntaxa(ps_euk_pruned_3domains_top100_anoxic),], euk_top100_tax_table, by = "name")
# rename column names in euk table to match others
euk_temp <- euk_temp %>%
rename("taxonomy-1" = Kingdom, "taxonomy-2" = Supergroup, "taxonomy-3" = Division, "taxonomy-4" = Class, "taxonomy-5" = Order, "taxonomy-6" = Family, "taxonomy-7" = Genus, "taxonomy-8" = Species)
# build full dataframe with all 3 domains
all_temp <- rbind(bac_temp, arch_temp, euk_temp)
# remake into graph
grph.top300.anoxic <- graph_from_data_frame(grph.top300_anoxic_df$edges,
directed = F,
vertices = all_temp)
# Make color paletter for domain
dtype = c("red", "green", "blue", "grey")
# Make color vector
domain_color_top300_anoxic <- dtype[as.numeric(as.factor(V(grph.top300.anoxic)$"taxonomy-1"))]
# Plot
plot(grph.top300.anoxic,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.anoxic),
vertex.color=domain_color_top300_anoxic)
title("SpiecEasi Network: All domains, anoxic")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya", "No Blast Hit"),
fill=c("red","green","blue", "grey"), border=NA)
# Save plot
setEPS()
postscript(file = "Figures/3domains_top300_anoxic_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.top300.anoxic,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.anoxic),
vertex.color=domain_color_top300_anoxic)
title("SpiecEasi Network: All domains, anoxic")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya", "No Blast Hit"),
fill=c("red","green","blue", "grey"), border=NA)
dev.off()
quartz_off_screen
2
# Subset based on pos or neg edges
grph.top300.anoxic.pos <-delete.edges(grph.top300.anoxic, which(E(grph.top300.anoxic)$weight<0))
grph.top300.anoxic.neg <-delete.edges(grph.top300.anoxic, which(E(grph.top300.anoxic)$weight>0))
# For each subsetted graph, remove those nodes that are no longer connected to anything
grph.top300.anoxic.pos <- delete.vertices(grph.top300.anoxic.pos, which(degree(grph.top300.anoxic.pos)==0))
grph.top300.anoxic.neg <- delete.vertices(grph.top300.anoxic.neg, which(degree(grph.top300.anoxic.neg)==0))
# Make color vector for each
domain_color_top300_anoxic_pos <- dtype[as.numeric(as.factor(V(grph.top300.anoxic.pos)$"taxonomy-1"))]
domain_color_top300_anoxic_neg <- dtype[as.numeric(as.factor(V(grph.top300.anoxic.neg)$"taxonomy-1"))]
# Plot pos
plot(grph.top300.anoxic.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.anoxic.pos),
vertex.color=domain_color_top300_anoxic_pos)
title("SpiecEasi Network: All domains, Positive Edges only, anoxic")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya", "No Blast Hit"),
fill=c("red","green","blue", "grey"), border=NA)
# Plot neg
plot(grph.top300.anoxic.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.anoxic.neg),
vertex.color=domain_color_top300_anoxic_neg)
title("SpiecEasi Network: All domains, Negative Edges Only, anoxic")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya", "No Blast Hit"),
fill=c("red","green","blue", "grey"), border=NA)
# Save plots
setEPS()
postscript(file = "Figures/3domains_top300_posedges_anoxic_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.top300.anoxic.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.anoxic.pos),
vertex.color=domain_color_top300_anoxic_pos)
title("SpiecEasi Network: All domains, Positive Edges Only, anoxic")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya", "No Blast Hit"),
fill=c("red","green","blue", "grey"), border=NA)
dev.off()
quartz_off_screen
2
setEPS()
postscript(file = "Figures/3domains_top300_negedges_anoxic_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.top300.anoxic.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.anoxic.neg),
vertex.color=domain_color_top300_anoxic_neg)
title("SpiecEasi Network: All domains, Negative Edges Only, anoxic")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya", "No Blast Hit"),
fill=c("red","green","blue", "grey"), border=NA)
dev.off()
quartz_off_screen
2
First extract ASV names from original phyloseq objects in order to retain for network nodes. Make sure these are in same order in which they went in SpiecEasi
rm(se_3domains_top100_anoxic)
load("EnvironmentBackups/CariacoEuks_revisions2_additionalvars_5.RData")
top300_df_nonparasites <- bind_rows(data.frame(otu_table(ps_bac_top100)), data.frame(otu_table(ps_arch_top100)), data.frame(otu_table(ps_euk_nonparasites_top100)))
top300_df_nonparasites
#Extract adjacency matrix from spiecEasi output
adj.mat <- getRefit(se_3domains_top100_nonparasites)
Error in getOptX(est, "refit") :
object 'se_3domains_top100_nonparasites' not found
# Subset based on pos or neg edges
grph.top300.nonparasites.pos <-delete.edges(grph.top300.nonparasites, which(E(grph.top300.nonparasites)$weight<0))
grph.top300.nonparasites.neg <-delete.edges(grph.top300.nonparasites, which(E(grph.top300.nonparasites)$weight>0))
# For each subsetted graph, remove those nodes that are no longer connected to anything
grph.top300.nonparasites.pos <- delete.vertices(grph.top300.nonparasites.pos, which(degree(grph.top300.nonparasites.pos)==0))
grph.top300.nonparasites.neg <- delete.vertices(grph.top300.nonparasites.neg, which(degree(grph.top300.nonparasites.neg)==0))
# Make color vector for each
domain_color_top300_nonparasites_pos <- dtype[as.numeric(as.factor(V(grph.top300.nonparasites.pos)$"taxonomy-1"))]
domain_color_top300_nonparasites_neg <- dtype[as.numeric(as.factor(V(grph.top300.nonparasites.neg)$"taxonomy-1"))]
# Plot pos
plot(grph.top300.nonparasites.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.nonparasites.pos),
vertex.color=domain_color_top300_nonparasites_pos)
title("SpiecEasi Network: All domains, Positive Edges only")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya", "No Blast Hit"),
fill=c("red","green","blue", "grey"), border=NA)
# Plot neg
plot(grph.top300.nonparasites.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.nonparasites.neg),
vertex.color=domain_color_top300_nonparasites_neg)
title("SpiecEasi Network: All domains, No Parasites, Negative Edges Only")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya", "No Blast Hit"),
fill=c("red","green","blue", "grey"), border=NA)
# Save plots
setEPS()
postscript(file = "Figures/3domains_nonparasites_top300_posedges_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.top300.nonparasites.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.nonparasites.pos),
vertex.color=domain_color_top300_nonparasites_pos)
title("SpiecEasi Network: All domains, No Parasites, Positive Edges Only, Oxycline")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya", "No Blast Hit"),
fill=c("red","green","blue", "grey"), border=NA)
dev.off()
quartz_off_screen
2
setEPS()
postscript(file = "Figures/3domains_nonparasites_top300_negedges_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.top300.nonparasites.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.nonparasites.neg),
vertex.color=domain_color_top300_nonparasites_neg)
title("SpiecEasi Network: All domains, No Parasites, Negative Edges Only")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya", "No Blast Hit"),
fill=c("red","green","blue", "grey"), border=NA)
dev.off()
quartz_off_screen
2
First extract ASV names from original phyloseq objects in order to retain for network nodes. Make sure these are in same order in which they went in SpiecEasi
#Extract adjacency matrix from spiecEasi output
adj.mat <- getRefit(se_3domains_top100_parasites)
table(as.numeric(adj.mat))
0 1
84640 5360
# Extract weighted adjacency
se.cor <- cov2cor(as.matrix(getOptCov(se_3domains_top100_parasites)))
weighted.adj.mat <- se.cor*getRefit(se_3domains_top100_parasites)
#Convert to graph objects
grph.unweighted.top300.parasites <- adj2igraph(adj.mat)
grph.top300.parasites <- adj2igraph(weighted.adj.mat)
# Put back in species names
V(grph.top300.parasites)$name <- rownames(top300_df_parasites)
# Make size of nodes proportional to degree (number of connections)
V(grph.top300.parasites)$size <- (degree(grph.top300.parasites) + 1) # the +1 avoids size zero vertices
# Change width of edges to be proportional to their weights
E(grph.top300.parasites)$width <- abs(E(grph.top300.parasites)$weight)*10
# Scale node sizes to be smaller
V(grph.top300.parasites)$size <- V(grph.top300.parasites)$size/2
# Join taxonomy data of each node
# Convert graph to datafram
grph.top300.parasites_df <- igraph::as_data_frame(grph.top300.parasites, 'both')
# make formatted taxonomy tables for each domain
bac_top100_tax_table <- as.data.frame(tax_table(ps_bac_pruned_3domains_top100)) %>%
mutate(name = rownames(tax_table(ps_bac_pruned_3domains_top100)))
arch_top100_tax_table <- as.data.frame(tax_table(ps_arch_pruned_3domains_top100)) %>%
mutate(name = rownames(tax_table(ps_arch_pruned_3domains_top100)))
euk_top100_tax_table <- as.data.frame(tax_table(ps_euk_parasites_pruned_3domains_top100)) %>%
mutate(name = rownames(tax_table(ps_euk_parasites_pruned_3domains_top100)))
#link graph data frame to formatted taxonomy tables
bac_temp <- left_join(grph.top300.parasites_df$vertices[1:ntaxa(ps_bac_pruned_3domains_top100),],
bac_top100_tax_table, by = "name")
# delete columns that don't match other tax tables
bac_temp <- select(bac_temp, -"taxonomy-9", -"Refined taxonomy")
arch_temp <- left_join(grph.top300.parasites_df$vertices[ntaxa(ps_arch_pruned_3domains_top100)+1:ntaxa(ps_arch_pruned_3domains_top100),],arch_top100_tax_table, by = "name")
euk_temp <- left_join(grph.top300.parasites_df$vertices[ntaxa(ps_euk_parasites_pruned_3domains_top100)+ntaxa(ps_euk_parasites_pruned_3domains_top100)+1:ntaxa(ps_euk_parasites_pruned_3domains_top100),], euk_top100_tax_table, by = "name")
# rename column names in euk table to match others
euk_temp <- euk_temp %>%
rename("taxonomy-1" = Kingdom, "taxonomy-2" = Supergroup, "taxonomy-3" = Division, "taxonomy-4" = Class, "taxonomy-5" = Order, "taxonomy-6" = Family, "taxonomy-7" = Genus, "taxonomy-8" = Species)
# build full dataframe with all 3 domains
all_temp <- rbind(bac_temp, arch_temp, euk_temp)
# remake into graph
grph.top300.parasites <- graph_from_data_frame(grph.top300.parasites_df$edges,
directed = F,
vertices = all_temp)
# Make color paletter for domain
dtype = c("red", "green", "blue", "grey")
# Make color vector
domain_color_top300_parasites <- dtype[as.numeric(as.factor(V(grph.top300.parasites)$"taxonomy-1"))]
# Plot
plot(grph.top300.parasites,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.parasites),
vertex.color=domain_color_top300_parasites)
title("SpiecEasi Network: All domains, Parasitic Eukaryotes Only")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya", "No blast hit"),
fill=c("red","green","blue", "grey"), border=NA)
# Save plot
setEPS()
postscript(file = "Figures/3domains_parasites_top300_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.top300.parasites,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.parasites),
vertex.color=domain_color_top300_parasites)
title("SpiecEasi Network: All domains, Parasitic Eukaryotes Only")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya", "No blast hit"),
fill=c("red","green","blue", "grey"), border=NA)
dev.off()
quartz_off_screen
2
# Subset based on pos or neg edges
grph.top300.parasites.pos <-delete.edges(grph.top300.parasites, which(E(grph.top300.parasites)$weight<0))
grph.top300.parasites.neg <-delete.edges(grph.top300.parasites, which(E(grph.top300.parasites)$weight>0))
# For each subsetted graph, remove those nodes that are no longer connected to anything
grph.top300.parasites.pos <- delete.vertices(grph.top300.parasites.pos, which(degree(grph.top300.parasites.pos)==0))
grph.top300.parasites.neg <- delete.vertices(grph.top300.parasites.neg, which(degree(grph.top300.parasites.neg)==0))
# Make color vector for each
domain_color_top300_parasites_pos <- dtype[as.numeric(as.factor(V(grph.top300.parasites.pos)$"taxonomy-1"))]
domain_color_top300_parasites_neg <- dtype[as.numeric(as.factor(V(grph.top300.parasites.neg)$"taxonomy-1"))]
# Plot pos
plot(grph.top300.parasites.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.parasites.pos),
vertex.color=domain_color_top300_parasites_pos)
title("SpiecEasi Network: All domains, Positive Edges only")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya", "No Blast Hit"),
fill=c("red","green","blue", "grey"), border=NA)
# Plot neg
plot(grph.top300.parasites.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.parasites.neg),
vertex.color=domain_color_top300_parasites_neg)
title("SpiecEasi Network: All domains, No Parasites, Negative Edges Only")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya", "No Blast Hit"),
fill=c("red","green","blue", "grey"), border=NA)
# Save plots
setEPS()
postscript(file = "Figures/3domains_parasites_top300_posedges_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.top300.parasites.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.parasites.pos),
vertex.color=domain_color_top300_parasites_pos)
title("SpiecEasi Network: All domains, No Parasites, Positive Edges Only, Oxycline")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya", "No Blast Hit"),
fill=c("red","green","blue", "grey"), border=NA)
dev.off()
quartz_off_screen
2
setEPS()
postscript(file = "Figures/3domains_parasites_top300_negedges_spieceasi_network.eps", width = 5.5, height = 5)
plot(grph.top300.parasites.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.parasites.neg),
vertex.color=domain_color_top300_parasites_neg)
title("SpiecEasi Network: All domains, No Parasites, Negative Edges Only")
legend("topright",bty = "n",
legend=c("Archaea", "Bacteria", "Eukarya", "No Blast Hit"),
fill=c("red","green","blue", "grey"), border=NA)
dev.off()
quartz_off_screen
2
Or load if coming back
load("EnvironmentBackups/CariacoEuks_postanalysis_vars_upto_Oct2021_newNetworks.RData")
Using the new, size-controlled network plots
First- All depths, oxycline, anoxic networks:
# Set up in panels
op <- par(oma=c(2,.5,.5,0),# Room for the titles and legend
mfrow=c(3,2),
mai=c(.15,.3,.15,.1))
# Panel 1- All depths, positive network
plot(grph.top300.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.pos),
vertex.color=domain_color_top300_pos)
mtext ("Positive", side = 3, outer = TRUE, line = -1, adj = 0.22, cex = .8)
mtext("All depths", side=2, cex = .8, line = 1.5)
# Panel 2- All depths, negative network
plot(grph.top300.2domains.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.2domains.neg),
vertex.color=domain_color_top300_neg)
mtext ("Negative", side = 3, outer = TRUE, line = -1, adj = .8, cex = .8)
# Panel 3- Oxycline, positive network
plot(grph.top300.oxycline.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.oxycline.pos),
vertex.color=domain_color_top300_oxycline_pos)
mtext("Oxycline", side=2, cex = .8, line = 1.5)
# Panel 4- Oxycline, negative network
plot(grph.top300.oxycline.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.oxycline.neg),
vertex.color=domain_color_top300_oxycline_neg)
# Panel 5- Anoxic, positive network
plot(grph.top300.anoxic.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.anoxic.pos),
vertex.color=domain_color_top300_anoxic_pos)
mtext("Anoxic", side=2, cex = .8, line = 1.5)
# Panel 6- Anoxic, negative network
plot(grph.top300.anoxic.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.anoxic.neg),
vertex.color=domain_color_top300_anoxic_neg)
# Add legend
par(op) # Leave the last plot
op <- par(usr=c(0,1,0,1), # Reset the coordinates
xpd=NA) # Allow plotting outside the plot region
legend(.15,-0.04, c("Archaea","Bacteria", "Eukarya", "No Blast Hit"), col=c("red", "green", "blue", "grey"), pch = c(16), box.col=NA, cex = .8, horiz = T, x.intersp = c(0.3))
# Save figure
# Set up EPS and make plot
setEPS(width = 6, height = 7)
postscript("Figures/Networks_pos_neg_all_oxycline_anox_Nov21.eps")
# Set up in panels
op <- par(oma=c(2,.5,.5,0),# Room for the titles and legend
mfrow=c(3,2),
mai=c(.15,.3,.15,.1))
# Panel 1- All depths, positive network
plot(grph.top300.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.pos),
vertex.color=domain_color_top300_pos)
mtext ("Positive", side = 3, outer = TRUE, line = -1, adj = 0.22, cex = .8)
mtext("All depths", side=2, cex = .8, line = 1.5)
# Panel 2- All depths, negative network
plot(grph.top300.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.neg),
vertex.color=domain_color_top300_neg)
mtext ("Negative", side = 3, outer = TRUE, line = -1, adj = .8, cex = .8)
# Panel 3- Oxycline, positive network
plot(grph.top300.oxycline.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.oxycline.pos),
vertex.color=domain_color_top300_oxycline_pos)
mtext("Oxycline", side=2, cex = .8, line = 1.5)
# Panel 4- Oxycline, negative network
plot(grph.top300.oxycline.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.oxycline.neg),
vertex.color=domain_color_top300_oxycline_neg)
# Panel 5- Anoxic, positive network
plot(grph.top300.anoxic.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.anoxic.pos),
vertex.color=domain_color_top300_anoxic_pos)
mtext("Anoxic", side=2, cex = .8, line = 1.5)
# Panel 6- Anoxic, negative network
plot(grph.top300.anoxic.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.anoxic.neg),
vertex.color=domain_color_top300_anoxic_neg)
# Add legend
par(op) # Leave the last plot
op <- par(usr=c(0,1,0,1), # Reset the coordinates
xpd=NA) # Allow plotting outside the plot region
legend(.15,-0.04, c("Archaea","Bacteria", "Eukarya", "No Blast Hit"), col=c("red", "green", "blue", "grey"), pch = c(16), box.col=NA, cex = .8, horiz = T, x.intersp = c(0.3))
dev.off()
quartz_off_screen
2
Second- No euks, no parasites, no nonparasites
# Set up in panels
op <- par(oma=c(2,.5,.5,0),# Room for the titles and legend
mfrow=c(3,2),
mai=c(.15,.3,.15,.1))
# Panel 1- Euks removed, positive network
plot(grph.top300.2domains.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.2domains.pos),
vertex.color=domain_color_2domains_top300_pos)
mtext ("Positive", side = 3, outer = TRUE, line = -1, adj = 0.22, cex = .8)
mtext("Eukaryotes removed", side=2, cex = .8, line = 1.5)
# Panel 2- Euks, negative network
plot(grph.top300.2domains.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.2domains.neg),
vertex.color=domain_color_2domains_top300_neg)
mtext ("Negative", side = 3, outer = TRUE, line = -1, adj = .8, cex = .8)
# Panel 3- Parasites removed, positive network
plot(grph.top300.nonparasites.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.nonparasites.pos),
vertex.color=domain_color_top300_nonparasites_pos)
mtext("Parasites Removed", side=2, cex = .8, line = 1.5)
# Panel 4- Parasites removed, negative network
plot(grph.top300.nonparasites.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.nonparasites.neg),
vertex.color=domain_color_top300_nonparasites_neg)
# Panel 5- Nonparasites removed, positive network
plot(grph.top300.parasites.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.parasites.pos),
vertex.color=domain_color_top300_parasites_pos)
mtext("Nonparasites Removed", side=2, cex = .8, line = 1.5)
# Panel 6- Nonparasites removed, negative network
plot(grph.top300.parasites.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.parasites.neg),
vertex.color=domain_color_top300_parasites_neg)
# Add legend
par(op) # Leave the last plot
op <- par(usr=c(0,1,0,1), # Reset the coordinates
xpd=NA) # Allow plotting outside the plot region
legend(.15,-0.04, c("Archaea","Bacteria", "Eukarya", "No Blast Hit"), col=c("red", "green", "blue", "grey"), pch = c(16), box.col=NA, cex = .8, horiz = T, x.intersp = c(0.3))
# Save figure
# Set up EPS and make plot
setEPS(width = 6, height = 7)
postscript("Figures/Networks_pos_neg_noeuks_noparasites_nononparasites_Nov21.eps")
# Set up in panels
op <- par(oma=c(2,.5,.5,0),# Room for the titles and legend
mfrow=c(3,2),
mai=c(.15,.3,.15,.1))
# Panel 1- Euks removed, positive network
plot(grph.top300.2domains.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.2domains.pos),
vertex.color=domain_color_2domains_top300_pos)
mtext ("Positive", side = 3, outer = TRUE, line = -1, adj = 0.22, cex = .8)
mtext("Eukaryotes removed", side=2, cex = .8, line = 1.5)
# Panel 2- Euks, negative network
plot(grph.top300.2domains.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.2domains.neg),
vertex.color=domain_color_2domains_top300_neg)
mtext ("Negative", side = 3, outer = TRUE, line = -1, adj = .8, cex = .8)
# Panel 3- Parasites removed, positive network
plot(grph.top300.nonparasites.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.nonparasites.pos),
vertex.color=domain_color_top300_nonparasites_pos)
mtext("Parasites Removed", side=2, cex = .8, line = 1.5)
# Panel 4- Parasites removed, negative network
plot(grph.top300.nonparasites.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.nonparasites.neg),
vertex.color=domain_color_top300_nonparasites_neg)
# Panel 5- Nonparasites removed, positive network
plot(grph.top300.parasites.pos,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.parasites.pos),
vertex.color=domain_color_top300_parasites_pos)
mtext("Nonparasites Removed", side=2, cex = .8, line = 1.5)
# Panel 6- Nonparasites removed, negative network
plot(grph.top300.parasites.neg,
vertex.label=NA,
layout=layout_with_graphopt(grph.top300.parasites.neg),
vertex.color=domain_color_top300_parasites_neg)
# Add legend
par(op) # Leave the last plot
op <- par(usr=c(0,1,0,1), # Reset the coordinates
xpd=NA) # Allow plotting outside the plot region
legend(.15,-0.04, c("Archaea","Bacteria", "Eukarya", "No Blast Hit"), col=c("red", "green", "blue", "grey"), pch = c(16), box.col=NA, cex = .8, horiz = T, x.intersp = c(0.3))
dev.off()
quartz_off_screen
2
From newly made graphs that control for network size
# total number of edges in full dataset network of top 300 OTUs/ ASVs
There were 25 warnings (use warnings() to see them)
length(E(grph.top300)$weight)
[1] 2468
# percent of neg edges
(sum(E(grph.top300)$weight<0)/length(E(grph.top300)$weight))*100
[1] 33.58995
# total number of edges in 2-domain dataset network
length(E(grph.2domains.top300)$weight)
[1] 3450
# percent of neg edges
(sum(E(grph.2domains.top300)$weight<0)/length(E(grph.2domains.top300)$weight))*100
[1] 30.84058
# total number of edges in oxycline network
length(E(grph.top300.oxycline)$weight)
[1] 1535
# percent of neg edges
(sum(E(grph.top300.oxycline)$weight<0)/length(E(grph.top300.oxycline)$weight))*100
[1] 39.15309
# total number of edges in anoxic network
length(E(grph.top300.anoxic)$weight)
[1] 767
# percent of neg edges
(sum(E(grph.top300.anoxic)$weight<0)/length(E(grph.top300.anoxic)$weight))*100
[1] 32.85528
# total number of edges in network with only nonparasite euks
length(E(grph.top300.nonparasites)$weight)
[1] 2824
# percent of neg edges
(sum(E(grph.top300.nonparasites)$weight<0)/length(E(grph.top300.nonparasites)$weight))*100
[1] 35.48159
# total number of edges in network with only parasite euks
length(E(grph.top300.parasites)$weight)
[1] 2680
# percent of neg edges
(sum(E(grph.top300.parasites)$weight<0)/length(E(grph.top300.parasites)$weight))*100
[1] 37.83582
the number of edges relatives to total number of possible edges
edge_density(grph.top300)*100
[1] 5.502787
edge_density(grph.2domains.top300)*100
[1] 7.692308
edge_density(grph.top300.oxycline)*100
[1] 3.42252
edge_density(grph.top300.anoxic)*100
[1] 1.710145
edge_density(grph.top300.nonparasites)*100
[1] 6.296544
edge_density(grph.top300.parasites)*100
[1] 5.975474
The size of the components, or “clumps,” in the network, and how many members in each
# full dataset
components(grph.top300)$no
[1] 38
components(grph.top300)$csize
[1] 233 9 5 1 1 1 1 9 3 1 1 1 1 1 3 1 1
[18] 4 1 1 1 2 1 1 1 1 1 1 3 1 1 1 1 1
[35] 1 1 1 1
# 2 domains
components(grph.2domains.top300)$no
[1] 21
components(grph.2domains.top300)$csize
[1] 276 1 1 1 1 1 1 1 2 1 1 1 1 1 3 2 1
[18] 1 1 1 1
# oxycline
components(grph.top300.oxycline)$no
[1] 23
components(grph.top300.oxycline)$csize
[1] 274 1 1 1 1 1 1 1 1 1 1 2 2 1 1 1 1
[18] 1 1 3 1 1 1
# anoxic
components(grph.top300.anoxic)$no
[1] 113
components(grph.top300.anoxic)$csize
[1] 140 3 1 1 2 10 1 1 1 3 1 1 1 1 1 1
[17] 1 1 1 1 1 1 1 1 1 2 1 1 1 11 1 2
[33] 5 1 1 2 1 2 1 3 1 2 1 1 1 1 1 1
[49] 1 2 1 1 1 1 1 3 1 1 1 1 1 1 1 1
[65] 1 1 1 1 1 6 1 1 1 1 1 2 3 1 1 1
[81] 1 1 2 1 1 2 1 1 1 1 1 1 1 1 1 1
[97] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[113] 1
# nonparasites
components(grph.top300.nonparasites)$no
[1] 41
components(grph.top300.nonparasites)$csize
[1] 222 9 5 1 1 10 1 1 1 3 1 1 1 5 1 1 1
[18] 1 5 3 1 1 1 1 1 2 4 1 1 1 1 1 1 1
[35] 1 1 1 2 1 1 1
# parasites
components(grph.top300.parasites)$no
[1] 40
components(grph.top300.parasites)$csize
[1] 257 4 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1
[18] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[35] 1 1 1 1 1 1
The anoxic network is most disjointed, with 48 clumps and the largest containing only 24 members. The next is oxycline, with 32 clumps and the largest with 144 members. Then the full dataset has only 27 clumps and the largest clump contains 262 members.
Path is the shortest distance between two nodes (fewest number of edges). Average path length of a network gives a sense of how connected every node is to another. Unconnected hubs in the netowrk will have “infinite” paths from other hubs. The function mean_distance ignores the infinite edges and calculates the average of all other edges
mean_distance(grph.top300)
[1] 3.137986
mean_distance(grph.2domains.top300)
[1] 3.254222
mean_distance(grph.top300.oxycline)
[1] 4.148024
mean_distance(grph.top300.anoxic)
[1] 5.327126
mean_distance(grph.top300.nonparasites)
[1] 2.846042
mean_distance(grph.top300.parasites)
[1] 4.645595
# Positive network- full dataset
grph.pos_df <- igraph::as_data_frame(grph.top300.pos, 'both')
grph.pos_df_vert <- grph.pos_df$vertices
# How many Syndiniales and Polycystinea?
as.data.frame(table(grph.pos_df_vert$"taxonomy-5"))
# 25 Dino-Group-II
# 1 Dino-Group-I
# 28 Spumellarida
# Negative network- full dataset
grph.neg_df <- igraph::as_data_frame(grph.top300.neg, 'both')
grph.neg_df_vert <- grph.neg_df$vertices
# How many Syndiniales and Polycystinea?
as.data.frame(table(grph.neg_df_vert$"taxonomy-5"))
# 8 Dino-Group-II
# 1 Dino-Group-I
# 7 Spumellarida
# Positive network- oxycline
grph.pos_df <- igraph::as_data_frame(grph.top300.oxycline.pos, 'both')
grph.pos_df_vert <- grph.pos_df$vertices
# How many Syndiniales and Polycystinea?
as.data.frame(table(grph.pos_df_vert$"taxonomy-5"))
# 46 Dino-Group-II
# 5 Dino-Group-I
# 30 Spumellarida
# Negative network- oxycline
grph.neg_df <- igraph::as_data_frame(grph.top300.oxycline.neg, 'both')
grph.neg_df_vert <- grph.neg_df$vertices
# How many Syndiniales and Polycystinea?
as.data.frame(table(grph.neg_df_vert$"taxonomy-5"))
# 37 Dino-Group-II
# 4 Dino-Group-I
# 17 Spumellarida
# Positive network- anoxic
grph.pos_df <- igraph::as_data_frame(grph.top300.anoxic.pos, 'both')
grph.pos_df_vert <- grph.pos_df$vertices
# How many Syndiniales and Polycystinea?
as.data.frame(table(grph.pos_df_vert$"taxonomy-5"))
# 7 Dino-Group-II
# 0 Dino-Group-I
# 15 Spumellarida
# Negative network- anoxic
grph.neg_df <- igraph::as_data_frame(grph.top300.anoxic.neg, 'both')
grph.neg_df_vert <- grph.neg_df$vertices
# How many Syndiniales and Polycystinea?
as.data.frame(table(grph.neg_df_vert$"taxonomy-5"))
# 1 Dino-Group-II
# 0 Dino-Group-I
# 2 Spumellarida
# Negative associations: Syndiniales
grph.neg_df_vert_synd <- filter(grph.neg_df_vert, `taxonomy-4` == "Syndiniales")
grph.neg_synd_edges <- E(grph.neg)[from(grph.neg_df_vert_synd$name)]
grph.neg.synd_subgraph <- subgraph.edges(grph.neg, grph.neg_synd_edges)
grph.neg_df_vert %>%
filter(`name` %in% V(grph.neg.synd_subgraph)$name & !`taxonomy-4` %in% c("Syndiniales")) %>%
count(`taxonomy-2`,`taxonomy-3`,`taxonomy-4`,`taxonomy-5`, name = "count", sort = TRUE)
# Negative associations: Spumellarida
grph.neg_df_vert_spum <- filter(grph.neg_df_vert, `taxonomy-5` == "Spumellarida")
grph.neg_spum_edges <- E(grph.neg)[from(grph.neg_df_vert_spum$name)]
grph.neg.spum_subgraph <- subgraph.edges(grph.neg, grph.neg_spum_edges)
grph.neg_df_vert %>%
filter(`name` %in% V(grph.neg.spum_subgraph)$name & !`taxonomy-5` %in% c("Spumellarida")) %>%
count(`taxonomy-2`,`taxonomy-3`,`taxonomy-4`,`taxonomy-5`, name = "count", sort = TRUE)
# Also ckeck out Cariacotrichea to hypothesize about possible symbiosis partners
# Positive associations only
grph.pos_df_vert_cari <- filter(grph.pos_df_vert, `taxonomy-4` == "Cariacotrichea")
grph.pos_cari_edges <- E(grph.pos)[from(grph.pos_df_vert_cari$name)]
grph.pos.cari_subgraph <- subgraph.edges(grph.pos, grph.pos_cari_edges)
grph.pos_df_vert %>%
filter(`name` %in% V(grph.pos.cari_subgraph)$name & !`taxonomy-4` %in% c("Cariacotrichea"))%>%
count(`taxonomy-2`,`taxonomy-3`,`taxonomy-4`,`taxonomy-5`, name = "count", sort = TRUE)
Calculate 4 parameters for each individual node:
# First change the weights of the edges (the strength of association) to absolute value. This won't work if negative edge weights are left with the negative signs
E(grph.top300)$weight <- abs(E(grph.top300)$weight)
# calculate parameters
names=V(grph.top300)$name
de=degree(grph.top300)
st=graph.strength(grph.top300)
be=betweenness(grph.top300, normalized=T)
cc = closeness(grph.top300)
At centrality.c:2617 :closeness centrality is not well-defined for disconnected graphs
l.cluster=transitivity(grph.top300, "local")
# assemble dataset and match full taxonomy
fulldateset_node_measures <- data.frame(ID=names, degree=de, strength=st, betweenness=be, closeness = cc, clustering_coefficient = l.cluster)
# Put back bac taxaonomy
temp1 <- left_join(fulldateset_node_measures[1:dim(otu_table(ps_bac_pruned_3domains_top100))[1],], bac_taxonomy, by = c("ID" = "#OTU ID"))
# delete "Taxonomy-9" and "refined Taxonomy" columns
temp1 <- select(temp1, -"taxonomy-9", -"Refined taxonomy")
temp2 <- left_join(fulldateset_node_measures[sum(dim(otu_table(ps_bac_pruned_3domains_top100))[1],1):sum(dim(otu_table(ps_bac_pruned_3domains_top100))[1],dim(otu_table(ps_arch_pruned_3domains_top100))[1]),], arch_taxonomy, by = c("ID" = "#OTU ID"))
temp3 <- left_join(fulldateset_node_measures[sum(dim(otu_table(ps_arch_pruned_3domains_top100))[1], dim(otu_table(ps_bac_pruned_3domains_top100))[1],1):sum(dim(otu_table(ps_arch_pruned_3domains_top100))[1], dim(otu_table(ps_bac_pruned_3domains_top100))[1],dim(otu_table(ps_euk_pruned_3domains_top100))[1]),], euk_taxonomy, by = c("ID" = "#ASV ID"))
# Rename col names to match those from Bac and Arch
temp3 <- temp3 %>%
rename("taxonomy-1" = Kingdom, "taxonomy-2" = Supergroup, "taxonomy-3" = Division, "taxonomy-4" = Class, "taxonomy-5" = Order, "taxonomy-6" = Family, "taxonomy-7" = Genus, "taxonomy-8" = Species)
# combine back all 3 domains, with new names as row names in a dataframe
fulldateset_node_measures <- rbind(temp1, temp2, temp3)
fulldateset_node_measures
Plot betweeness vs degree for each node. - Tipton et al. argue that nodes with high betweenness are “bottlenecks” or important connectors and nodes with high degree are “hubs” - Berry et al. argue that nodes with low betweenness, high degree, high closeness, and high transitivity are candidate keystone species - Add in closeness into the node’s plotly label since these don’t vary much node-to-node and wouldn’t make sense to plot
# replace NA in taxonomy with unidentified
# remove nodes with 0 betweenness (can't calculate log10 of 0)
# replace NaN clustering coefs with 0
fulldateset_node_measures <- fulldateset_node_measures %>%
replace(is.na(.), "unidentified") %>%
filter(!betweenness == 0)
# get enough colors and randomly rearrange so they are easier to separate on the plot
mycolors <- colorRampPalette(brewer.pal(12, "Paired"))(length(unique(fulldateset_node_measures$`taxonomy-3`)))
set.seed(123)
mycolors <- sample(mycolors)
# plot with plotly and so I can hover over points and determine which taxa they are
p <- ggplot(fulldateset_node_measures, aes(x = degree, y = betweenness, ID = ID, shape = `taxonomy-1`, `taxonomy-2` = `taxonomy-2`, color = `taxonomy-3`, `taxonomy-4` = `taxonomy-4`, `taxonomy-5` = `taxonomy-5`)) +
geom_point(size = 4) +
scale_y_continuous(trans='log10') +
scale_color_manual(values = mycolors) +
theme(legend.title = element_blank()) +
theme_bw()
p
ggplotly(p, tooltip = c("ID","taxonomy-2", "taxonomy-3", "taxonomy-4", "taxonomy-5"))
NA
Make static figure for manuscript
There were 26 warnings (use warnings() to see them)
p2 <- ggplot(fulldateset_node_measures, aes(x = degree, y = betweenness, shape = `taxonomy-1`, color = `taxonomy-3`)) +
geom_point(size = 4) +
scale_y_continuous(trans='log10') +
scale_color_manual(values = mycolors, name = "") +
scale_shape_manual(values = c(19,17,15,18), name = "") +
theme(legend.title=element_blank(),
axis.text = element_text(size=8),
axis.text.x = element_text(size=8, angle = 45, hjust = 1),
axis.title = element_text(size=8),
legend.text = element_text(size=8),
strip.text = element_text(size = 8),
legend.margin=margin(0,0,0,2),
legend.box.margin=margin(-10,-10,-10,-10),
plot.margin=grid::unit(c(0,0,0,0), "mm")) +
theme_bw()
# extract degree from graph and make into dataframe
de_df <- as.data.frame(de)
de_df$name <- rownames(de_df)
de_df <- left_join(de_df, all_temp, by = "name")
de_df %>%
group_by(`taxonomy-1`) %>%
summarise(degreesum = sum(de)) %>%
arrange(desc(degreesum))
de_df %>%
group_by(`taxonomy-2`) %>%
summarise(degreesum = sum(de)) %>%
arrange(desc(degreesum))
de_df %>%
group_by(`taxonomy-3`) %>%
summarise(degreesum = sum(de)) %>%
arrange(desc(degreesum))
de_df %>%
group_by(`taxonomy-4`) %>%
summarise(degreesum = sum(de)) %>%
arrange(desc(degreesum))
de_df %>%
group_by(`taxonomy-5`) %>%
summarise(degreesum = sum(de)) %>%
arrange(desc(degreesum))
NA
To include in main text